view mercurial/verify.py @ 18792:10669e24eb6c

completion: add a debugpathcomplete command The bash_completion code uses "hg status" to generate a list of possible completions for commands that operate on files in the working directory. In a large working directory, this can result in a single tab-completion being very slow (several seconds) as a result of checking the status of every file, even when there is no need to check status or no possible matches. The new debugpathcomplete command gains performance in a few simple ways: * Allow completion to operate on just a single directory. When used to complete the right commands, this considerably reduces the number of completions returned, at no loss in functionality. * Never check the status of files. For completions that really must know if a file is modified, it is faster to use status: hg status -nm 'glob:myprefix**' Performance: Here are the commands used by bash_completion to complete, run in the root of the mozilla-central working dir (~77,000 files) and another repo (~165,000 files): All "normal state" files (used by e.g. remove, revert): mozilla other status -nmcd 'glob:**' 1.77 4.10 sec debugpathcomplete -f -n 0.53 1.26 debugpathcomplete -n 0.17 0.41 ("-f" means "complete full paths", rather than the current directory) Tracked files matching "a": mozilla other status -nmcd 'glob:a**' 0.26 0.47 debugpathcomplete -f -n a 0.10 0.24 debugpathcomplete -n a 0.10 0.22 We should be able to further improve completion performance once the critbit work lands. Right now, our performance is limited by the need to iterate over all keys in the dirstate.
author Bryan O'Sullivan <bryano@fb.com>
date Thu, 21 Mar 2013 16:31:28 -0700
parents e1e181a64de7
children 78f547cdc362
line wrap: on
line source

# verify.py - repository integrity checking for Mercurial
#
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from node import nullid, short
from i18n import _
import os
import revlog, util, error

def verify(repo):
    lock = repo.lock()
    try:
        return _verify(repo)
    finally:
        lock.release()

def _normpath(f):
    # under hg < 2.4, convert didn't sanitize paths properly, so a
    # converted repo may contain repeated slashes
    while '//' in f:
        f = f.replace('//', '/')
    return f

def _verify(repo):
    repo = repo.unfiltered()
    mflinkrevs = {}
    filelinkrevs = {}
    filenodes = {}
    revisions = 0
    badrevs = set()
    errors = [0]
    warnings = [0]
    ui = repo.ui
    cl = repo.changelog
    mf = repo.manifest
    lrugetctx = util.lrucachefunc(repo.changectx)

    if not repo.cancopy():
        raise util.Abort(_("cannot verify bundle or remote repos"))

    def err(linkrev, msg, filename=None):
        if linkrev is not None:
            badrevs.add(linkrev)
        else:
            linkrev = '?'
        msg = "%s: %s" % (linkrev, msg)
        if filename:
            msg = "%s@%s" % (filename, msg)
        ui.warn(" " + msg + "\n")
        errors[0] += 1

    def exc(linkrev, msg, inst, filename=None):
        if isinstance(inst, KeyboardInterrupt):
            ui.warn(_("interrupted"))
            raise
        if not str(inst):
            inst = repr(inst)
        err(linkrev, "%s: %s" % (msg, inst), filename)

    def warn(msg):
        ui.warn(msg + "\n")
        warnings[0] += 1

    def checklog(obj, name, linkrev):
        if not len(obj) and (havecl or havemf):
            err(linkrev, _("empty or missing %s") % name)
            return

        d = obj.checksize()
        if d[0]:
            err(None, _("data length off by %d bytes") % d[0], name)
        if d[1]:
            err(None, _("index contains %d extra bytes") % d[1], name)

        if obj.version != revlog.REVLOGV0:
            if not revlogv1:
                warn(_("warning: `%s' uses revlog format 1") % name)
        elif revlogv1:
            warn(_("warning: `%s' uses revlog format 0") % name)

    def checkentry(obj, i, node, seen, linkrevs, f):
        lr = obj.linkrev(obj.rev(node))
        if lr < 0 or (havecl and lr not in linkrevs):
            if lr < 0 or lr >= len(cl):
                msg = _("rev %d points to nonexistent changeset %d")
            else:
                msg = _("rev %d points to unexpected changeset %d")
            err(None, msg % (i, lr), f)
            if linkrevs:
                if f and len(linkrevs) > 1:
                    try:
                        # attempt to filter down to real linkrevs
                        linkrevs = [l for l in linkrevs
                                    if lrugetctx(l)[f].filenode() == node]
                    except Exception:
                        pass
                warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
            lr = None # can't be trusted

        try:
            p1, p2 = obj.parents(node)
            if p1 not in seen and p1 != nullid:
                err(lr, _("unknown parent 1 %s of %s") %
                    (short(p1), short(node)), f)
            if p2 not in seen and p2 != nullid:
                err(lr, _("unknown parent 2 %s of %s") %
                    (short(p2), short(node)), f)
        except Exception, inst:
            exc(lr, _("checking parents of %s") % short(node), inst, f)

        if node in seen:
            err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
        seen[node] = i
        return lr

    if os.path.exists(repo.sjoin("journal")):
        ui.warn(_("abandoned transaction found - run hg recover\n"))

    revlogv1 = cl.version != revlog.REVLOGV0
    if ui.verbose or not revlogv1:
        ui.status(_("repository uses revlog format %d\n") %
                       (revlogv1 and 1 or 0))

    havecl = len(cl) > 0
    havemf = len(mf) > 0

    ui.status(_("checking changesets\n"))
    refersmf = False
    seen = {}
    checklog(cl, "changelog", 0)
    total = len(repo)
    for i in repo:
        ui.progress(_('checking'), i, total=total, unit=_('changesets'))
        n = cl.node(i)
        checkentry(cl, i, n, seen, [i], "changelog")

        try:
            changes = cl.read(n)
            if changes[0] != nullid:
                mflinkrevs.setdefault(changes[0], []).append(i)
                refersmf = True
            for f in changes[3]:
                filelinkrevs.setdefault(_normpath(f), []).append(i)
        except Exception, inst:
            refersmf = True
            exc(i, _("unpacking changeset %s") % short(n), inst)
    ui.progress(_('checking'), None)

    ui.status(_("checking manifests\n"))
    seen = {}
    if refersmf:
        # Do not check manifest if there are only changelog entries with
        # null manifests.
        checklog(mf, "manifest", 0)
    total = len(mf)
    for i in mf:
        ui.progress(_('checking'), i, total=total, unit=_('manifests'))
        n = mf.node(i)
        lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
        if n in mflinkrevs:
            del mflinkrevs[n]
        else:
            err(lr, _("%s not in changesets") % short(n), "manifest")

        try:
            for f, fn in mf.readdelta(n).iteritems():
                if not f:
                    err(lr, _("file without name in manifest"))
                elif f != "/dev/null":
                    filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
        except Exception, inst:
            exc(lr, _("reading manifest delta %s") % short(n), inst)
    ui.progress(_('checking'), None)

    ui.status(_("crosschecking files in changesets and manifests\n"))

    total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
    count = 0
    if havemf:
        for c, m in sorted([(c, m) for m in mflinkrevs
                            for c in mflinkrevs[m]]):
            count += 1
            if m == nullid:
                continue
            ui.progress(_('crosschecking'), count, total=total)
            err(c, _("changeset refers to unknown manifest %s") % short(m))
        mflinkrevs = None # del is bad here due to scope issues

        for f in sorted(filelinkrevs):
            count += 1
            ui.progress(_('crosschecking'), count, total=total)
            if f not in filenodes:
                lr = filelinkrevs[f][0]
                err(lr, _("in changeset but not in manifest"), f)

    if havecl:
        for f in sorted(filenodes):
            count += 1
            ui.progress(_('crosschecking'), count, total=total)
            if f not in filelinkrevs:
                try:
                    fl = repo.file(f)
                    lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
                except Exception:
                    lr = None
                err(lr, _("in manifest but not in changeset"), f)

    ui.progress(_('crosschecking'), None)

    ui.status(_("checking files\n"))

    storefiles = set()
    for f, f2, size in repo.store.datafiles():
        if not f:
            err(None, _("cannot decode filename '%s'") % f2)
        elif size > 0 or not revlogv1:
            storefiles.add(_normpath(f))

    files = sorted(set(filenodes) | set(filelinkrevs))
    total = len(files)
    for i, f in enumerate(files):
        ui.progress(_('checking'), i, item=f, total=total)
        try:
            linkrevs = filelinkrevs[f]
        except KeyError:
            # in manifest but not in changelog
            linkrevs = []

        if linkrevs:
            lr = linkrevs[0]
        else:
            lr = None

        try:
            fl = repo.file(f)
        except error.RevlogError, e:
            err(lr, _("broken revlog! (%s)") % e, f)
            continue

        for ff in fl.files():
            try:
                storefiles.remove(ff)
            except KeyError:
                err(lr, _("missing revlog!"), ff)

        checklog(fl, f, lr)
        seen = {}
        rp = None
        for i in fl:
            revisions += 1
            n = fl.node(i)
            lr = checkentry(fl, i, n, seen, linkrevs, f)
            if f in filenodes:
                if havemf and n not in filenodes[f]:
                    err(lr, _("%s not in manifests") % (short(n)), f)
                else:
                    del filenodes[f][n]

            # verify contents
            try:
                l = len(fl.read(n))
                rp = fl.renamed(n)
                if l != fl.size(i):
                    if len(fl.revision(n)) != fl.size(i):
                        err(lr, _("unpacked size is %s, %s expected") %
                            (l, fl.size(i)), f)
            except Exception, inst:
                exc(lr, _("unpacking %s") % short(n), inst, f)

            # check renames
            try:
                if rp:
                    if lr is not None and ui.verbose:
                        ctx = lrugetctx(lr)
                        found = False
                        for pctx in ctx.parents():
                            if rp[0] in pctx:
                                found = True
                                break
                        if not found:
                            warn(_("warning: copy source of '%s' not"
                                   " in parents of %s") % (f, ctx))
                    fl2 = repo.file(rp[0])
                    if not len(fl2):
                        err(lr, _("empty or missing copy source revlog %s:%s")
                            % (rp[0], short(rp[1])), f)
                    elif rp[1] == nullid:
                        ui.note(_("warning: %s@%s: copy source"
                                  " revision is nullid %s:%s\n")
                            % (f, lr, rp[0], short(rp[1])))
                    else:
                        fl2.rev(rp[1])
            except Exception, inst:
                exc(lr, _("checking rename of %s") % short(n), inst, f)

        # cross-check
        if f in filenodes:
            fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
            for lr, node in sorted(fns):
                err(lr, _("%s in manifests not found") % short(node), f)
    ui.progress(_('checking'), None)

    for f in storefiles:
        warn(_("warning: orphan revlog '%s'") % f)

    ui.status(_("%d files, %d changesets, %d total revisions\n") %
                   (len(files), len(cl), revisions))
    if warnings[0]:
        ui.warn(_("%d warnings encountered!\n") % warnings[0])
    if errors[0]:
        ui.warn(_("%d integrity errors encountered!\n") % errors[0])
        if badrevs:
            ui.warn(_("(first damaged changeset appears to be %d)\n")
                    % min(badrevs))
        return 1