mercurial/verify.py
author Martin Geisler <mg@lazybytes.net>
Wed, 10 Jun 2009 01:09:21 +0200
changeset 8774 dbb258564651
parent 8466 afb3e504b558
child 8993 46441934c585
permissions -rw-r--r--
hg.1: simplify synopsis Now that arguments can be mixed with options we can simplify the synopsis considerably. Also, highlighting the command name in bold (instead of italics) seem to be the standard convention.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     1
# verify.py - repository integrity checking for Mercurial
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     2
#
4635
63b9d2deed48 Updated copyright notices and add "and others" to "hg version"
Thomas Arendsen Hein <thomas@intevation.de>
parents: 4395
diff changeset
     3
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     4
#
8225
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 8209
diff changeset
     5
# This software may be used and distributed according to the terms of the
46293a0c7e9f updated license to be explicit about GPL version 2
Martin Geisler <mg@lazybytes.net>
parents: 8209
diff changeset
     6
# GNU General Public License version 2, incorporated herein by reference.
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     7
6211
f89fd07fc51d Expand import * to allow Pyflakes to find problems
Joel Rosdahl <joel@rosdahl.net>
parents: 5541
diff changeset
     8
from node import nullid, short
3891
6b4127c7d52a Simplify i18n imports
Matt Mackall <mpm@selenic.com>
parents: 3744
diff changeset
     9
from i18n import _
7832
1fce19b9b011 verify: do not abort on fully corrupted revlog
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7676
diff changeset
    10
import revlog, util, error
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    11
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    12
def verify(repo):
4915
97b734fb9c6f Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents: 4635
diff changeset
    13
    lock = repo.lock()
97b734fb9c6f Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents: 4635
diff changeset
    14
    try:
97b734fb9c6f Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents: 4635
diff changeset
    15
        return _verify(repo)
97b734fb9c6f Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents: 4635
diff changeset
    16
    finally:
8109
496ae1ea4698 switch lock releasing in the core from gc to explicit
Ronny Pfannschmidt <Ronny.Pfannschmidt@gmx.de>
parents: 7926
diff changeset
    17
        lock.release()
4915
97b734fb9c6f Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents: 4635
diff changeset
    18
97b734fb9c6f Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents: 4635
diff changeset
    19
def _verify(repo):
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    20
    mflinkrevs = {}
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    21
    filelinkrevs = {}
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    22
    filenodes = {}
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    23
    revisions = 0
8466
afb3e504b558 verify: use set instead of dict
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8394
diff changeset
    24
    badrevs = set()
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    25
    errors = [0]
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    26
    warnings = [0]
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
    27
    ui = repo.ui
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
    28
    cl = repo.changelog
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
    29
    mf = repo.manifest
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    30
7141
8d1bdaf842de issue 1144: prevent traceback on verify of bundles
John Mulligan <phlogistonjohn@asynchrono.us>
parents: 7004
diff changeset
    31
    if not repo.cancopy():
8d1bdaf842de issue 1144: prevent traceback on verify of bundles
John Mulligan <phlogistonjohn@asynchrono.us>
parents: 7004
diff changeset
    32
        raise util.Abort(_("cannot verify bundle or remote repos"))
8d1bdaf842de issue 1144: prevent traceback on verify of bundles
John Mulligan <phlogistonjohn@asynchrono.us>
parents: 7004
diff changeset
    33
5313
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    34
    def err(linkrev, msg, filename=None):
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    35
        if linkrev != None:
8466
afb3e504b558 verify: use set instead of dict
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8394
diff changeset
    36
            badrevs.add(linkrev)
5313
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    37
        else:
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    38
            linkrev = '?'
5313
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    39
        msg = "%s: %s" % (linkrev, msg)
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    40
        if filename:
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    41
            msg = "%s@%s" % (filename, msg)
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
    42
        ui.warn(" " + msg + "\n")
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    43
        errors[0] += 1
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    44
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    45
    def exc(linkrev, msg, inst, filename=None):
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    46
        if isinstance(inst, KeyboardInterrupt):
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    47
            ui.warn(_("interrupted"))
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    48
            raise
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    49
        err(linkrev, "%s: %s" % (msg, inst), filename)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    50
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    51
    def warn(msg):
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
    52
        ui.warn(msg + "\n")
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    53
        warnings[0] += 1
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    54
8292
29540554def8 verify: reference the correct linkrev when a filelog is missing
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8291
diff changeset
    55
    def checklog(obj, name, linkrev):
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    56
        if not len(obj) and (havecl or havemf):
8292
29540554def8 verify: reference the correct linkrev when a filelog is missing
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8291
diff changeset
    57
            err(linkrev, _("empty or missing %s") % name)
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    58
            return
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    59
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    60
        d = obj.checksize()
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    61
        if d[0]:
5313
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    62
            err(None, _("data length off by %d bytes") % d[0], name)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    63
        if d[1]:
5313
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
    64
            err(None, _("index contains %d extra bytes") % d[1], name)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    65
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    66
        if obj.version != revlog.REVLOGV0:
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    67
            if not revlogv1:
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    68
                warn(_("warning: `%s' uses revlog format 1") % name)
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    69
        elif revlogv1:
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    70
            warn(_("warning: `%s' uses revlog format 0") % name)
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    71
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    72
    def checkentry(obj, i, node, seen, linkrevs, f):
7361
9fe97eea5510 linkrev: take a revision number rather than a hash
Matt Mackall <mpm@selenic.com>
parents: 7141
diff changeset
    73
        lr = obj.linkrev(obj.rev(node))
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    74
        if lr < 0 or (havecl and lr not in linkrevs):
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    75
            if lr < 0 or lr >= len(cl):
7926
edf2d83a11aa verify, i18n: fix unmarked strings
Wagner Bruna <wbruna@yahoo.com>
parents: 7874
diff changeset
    76
                msg = _("rev %d points to nonexistent changeset %d")
edf2d83a11aa verify, i18n: fix unmarked strings
Wagner Bruna <wbruna@yahoo.com>
parents: 7874
diff changeset
    77
            else:
edf2d83a11aa verify, i18n: fix unmarked strings
Wagner Bruna <wbruna@yahoo.com>
parents: 7874
diff changeset
    78
                msg = _("rev %d points to unexpected changeset %d")
edf2d83a11aa verify, i18n: fix unmarked strings
Wagner Bruna <wbruna@yahoo.com>
parents: 7874
diff changeset
    79
            err(None, msg % (i, lr), f)
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    80
            if linkrevs:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    81
                warn(_(" (expected %s)") % " ".join(map(str,linkrevs)))
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    82
            lr = None # can't be trusted
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    83
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    84
        try:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    85
            p1, p2 = obj.parents(node)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    86
            if p1 not in seen and p1 != nullid:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    87
                err(lr, _("unknown parent 1 %s of %s") %
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    88
                    (short(p1), short(n)), f)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    89
            if p2 not in seen and p2 != nullid:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    90
                err(lr, _("unknown parent 2 %s of %s") %
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    91
                    (short(p2), short(p1)), f)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    92
        except Exception, inst:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    93
            exc(lr, _("checking parents of %s") % short(node), inst, f)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    94
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    95
        if node in seen:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    96
            err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    97
        seen[n] = i
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    98
        return lr
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
    99
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   100
    revlogv1 = cl.version != revlog.REVLOGV0
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   101
    if ui.verbose or not revlogv1:
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   102
        ui.status(_("repository uses revlog format %d\n") %
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   103
                       (revlogv1 and 1 or 0))
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   104
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   105
    havecl = len(cl) > 0
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   106
    havemf = len(mf) > 0
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   107
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   108
    ui.status(_("checking changesets\n"))
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   109
    seen = {}
8292
29540554def8 verify: reference the correct linkrev when a filelog is missing
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8291
diff changeset
   110
    checklog(cl, "changelog", 0)
6750
fb42030d79d6 add __len__ and __iter__ methods to repo and revlog
Matt Mackall <mpm@selenic.com>
parents: 6534
diff changeset
   111
    for i in repo:
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   112
        n = cl.node(i)
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   113
        checkentry(cl, i, n, seen, [i], "changelog")
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   114
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   115
        try:
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   116
            changes = cl.read(n)
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   117
            mflinkrevs.setdefault(changes[0], []).append(i)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   118
            for f in changes[3]:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   119
                filelinkrevs.setdefault(f, []).append(i)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   120
        except Exception, inst:
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   121
            exc(i, _("unpacking changeset %s") % short(n), inst)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   122
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   123
    ui.status(_("checking manifests\n"))
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   124
    seen = {}
8292
29540554def8 verify: reference the correct linkrev when a filelog is missing
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8291
diff changeset
   125
    checklog(mf, "manifest", 0)
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   126
    for i in mf:
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   127
        n = mf.node(i)
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   128
        lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   129
        if n in mflinkrevs:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   130
            del mflinkrevs[n]
8394
850b5a7c210d verify: detect manifest revs not in any changeset
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 8292
diff changeset
   131
        else:
850b5a7c210d verify: detect manifest revs not in any changeset
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 8292
diff changeset
   132
            err(lr, _("%s not in changesets") % short(n), "manifest")
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   133
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   134
        try:
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   135
            for f, fn in mf.readdelta(n).iteritems():
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   136
                if not f:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   137
                    err(lr, _("file without name in manifest"))
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   138
                elif f != "/dev/null":
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   139
                    fns = filenodes.setdefault(f, {})
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   140
                    if fn not in fns:
7361
9fe97eea5510 linkrev: take a revision number rather than a hash
Matt Mackall <mpm@selenic.com>
parents: 7141
diff changeset
   141
                        fns[fn] = i
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   142
        except Exception, inst:
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   143
            exc(lr, _("reading manifest delta %s") % short(n), inst)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   144
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   145
    ui.status(_("crosschecking files in changesets and manifests\n"))
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   146
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   147
    if havemf:
8209
a1a5a57efe90 replace util.sort with sorted built-in
Matt Mackall <mpm@selenic.com>
parents: 8164
diff changeset
   148
        for c,m in sorted([(c, m) for m in mflinkrevs for c in mflinkrevs[m]]):
5541
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   149
            err(c, _("changeset refers to unknown manifest %s") % short(m))
6762
f67d1468ac50 util: add sort helper
Matt Mackall <mpm@selenic.com>
parents: 6752
diff changeset
   150
        del mflinkrevs
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   151
8209
a1a5a57efe90 replace util.sort with sorted built-in
Matt Mackall <mpm@selenic.com>
parents: 8164
diff changeset
   152
        for f in sorted(filelinkrevs):
5541
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   153
            if f not in filenodes:
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   154
                lr = filelinkrevs[f][0]
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   155
                err(lr, _("in changeset but not in manifest"), f)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   156
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   157
    if havecl:
8209
a1a5a57efe90 replace util.sort with sorted built-in
Matt Mackall <mpm@selenic.com>
parents: 8164
diff changeset
   158
        for f in sorted(filenodes):
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   159
            if f not in filelinkrevs:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   160
                try:
7361
9fe97eea5510 linkrev: take a revision number rather than a hash
Matt Mackall <mpm@selenic.com>
parents: 7141
diff changeset
   161
                    fl = repo.file(f)
9fe97eea5510 linkrev: take a revision number rather than a hash
Matt Mackall <mpm@selenic.com>
parents: 7141
diff changeset
   162
                    lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   163
                except:
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   164
                    lr = None
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   165
                err(lr, _("in manifest but not in changeset"), f)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   166
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   167
    ui.status(_("checking files\n"))
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   168
8466
afb3e504b558 verify: use set instead of dict
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8394
diff changeset
   169
    storefiles = set()
6900
def492d1b592 store: change handling of decoding errors
Matt Mackall <mpm@selenic.com>
parents: 6892
diff changeset
   170
    for f, f2, size in repo.store.datafiles():
def492d1b592 store: change handling of decoding errors
Matt Mackall <mpm@selenic.com>
parents: 6892
diff changeset
   171
        if not f:
def492d1b592 store: change handling of decoding errors
Matt Mackall <mpm@selenic.com>
parents: 6892
diff changeset
   172
            err(None, _("cannot decode filename '%s'") % f2)
def492d1b592 store: change handling of decoding errors
Matt Mackall <mpm@selenic.com>
parents: 6892
diff changeset
   173
        elif size > 0:
8466
afb3e504b558 verify: use set instead of dict
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8394
diff changeset
   174
            storefiles.add(f)
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   175
8209
a1a5a57efe90 replace util.sort with sorted built-in
Matt Mackall <mpm@selenic.com>
parents: 8164
diff changeset
   176
    files = sorted(set(filenodes) | set(filelinkrevs))
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   177
    for f in files:
8291
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   178
        try:
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   179
            linkrevs = filelinkrevs[f]
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   180
        except KeyError:
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   181
            # in manifest but not in changelog
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   182
            linkrevs = []
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   183
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   184
        if linkrevs:
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   185
            lr = linkrevs[0]
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   186
        else:
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   187
            lr = None
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   188
7832
1fce19b9b011 verify: do not abort on fully corrupted revlog
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7676
diff changeset
   189
        try:
1fce19b9b011 verify: do not abort on fully corrupted revlog
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7676
diff changeset
   190
            fl = repo.file(f)
1fce19b9b011 verify: do not abort on fully corrupted revlog
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7676
diff changeset
   191
        except error.RevlogError, e:
7833
794def2fe232 verify: find correct first corrupted cset for missing/corrupted revlogs
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7832
diff changeset
   192
            err(lr, _("broken revlog! (%s)") % e, f)
7832
1fce19b9b011 verify: do not abort on fully corrupted revlog
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7676
diff changeset
   193
            continue
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   194
6900
def492d1b592 store: change handling of decoding errors
Matt Mackall <mpm@selenic.com>
parents: 6892
diff changeset
   195
        for ff in fl.files():
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   196
            try:
8466
afb3e504b558 verify: use set instead of dict
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8394
diff changeset
   197
                storefiles.remove(ff)
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   198
            except KeyError:
7833
794def2fe232 verify: find correct first corrupted cset for missing/corrupted revlogs
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 7832
diff changeset
   199
                err(lr, _("missing revlog!"), ff)
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   200
8292
29540554def8 verify: reference the correct linkrev when a filelog is missing
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 8291
diff changeset
   201
        checklog(fl, f, lr)
5313
29be4228303b verify: report first bad changeset
Matt Mackall <mpm@selenic.com>
parents: 5179
diff changeset
   202
        seen = {}
6750
fb42030d79d6 add __len__ and __iter__ methods to repo and revlog
Matt Mackall <mpm@selenic.com>
parents: 6534
diff changeset
   203
        for i in fl:
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   204
            revisions += 1
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   205
            n = fl.node(i)
8291
f5c1a9094e41 verify: avoid exception on missing file revlog
Henrik Stuart <hg@hstuart.dk>
parents: 8225
diff changeset
   206
            lr = checkentry(fl, i, n, seen, linkrevs, f)
5541
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   207
            if f in filenodes:
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   208
                if havemf and n not in filenodes[f]:
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   209
                    err(lr, _("%s not in manifests") % (short(n)), f)
5541
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   210
                else:
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   211
                    del filenodes[f][n]
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   212
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   213
            # verify contents
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   214
            try:
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   215
                t = fl.read(n)
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   216
                rp = fl.renamed(n)
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   217
                if len(t) != fl.size(i):
7675
011e69b96c69 verify: don't trip over binary files starting with 01 0a
Matt Mackall <mpm@selenic.com>
parents: 7361
diff changeset
   218
                    if len(fl.revision(n)) != fl.size(i):
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   219
                        err(lr, _("unpacked size is %s, %s expected") %
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   220
                            (len(t), fl.size(i)), f)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   221
            except Exception, inst:
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   222
                exc(lr, _("unpacking %s") % short(n), inst, f)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   223
3744
d626fc9e3985 verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents: 3473
diff changeset
   224
            # check renames
d626fc9e3985 verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents: 3473
diff changeset
   225
            try:
d626fc9e3985 verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents: 3473
diff changeset
   226
                if rp:
d626fc9e3985 verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents: 3473
diff changeset
   227
                    fl2 = repo.file(rp[0])
6750
fb42030d79d6 add __len__ and __iter__ methods to repo and revlog
Matt Mackall <mpm@selenic.com>
parents: 6534
diff changeset
   228
                    if not len(fl2):
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   229
                        err(lr, _("empty or missing copy source revlog %s:%s")
6534
9b35a9f34675 verify: check copy source revlog and nodeid
Patrick Mezard <pmezard@gmail.com>
parents: 6211
diff changeset
   230
                            % (rp[0], short(rp[1])), f)
9b35a9f34675 verify: check copy source revlog and nodeid
Patrick Mezard <pmezard@gmail.com>
parents: 6211
diff changeset
   231
                    elif rp[1] == nullid:
7004
90227c42b5f6 c0bd7d8b69ef uses err() instead of warn() but prototype doesn't match
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 6900
diff changeset
   232
                        warn(_("warning: %s@%s: copy source revision is nullid %s:%s")
90227c42b5f6 c0bd7d8b69ef uses err() instead of warn() but prototype doesn't match
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 6900
diff changeset
   233
                            % (f, lr, rp[0], short(rp[1])))
6534
9b35a9f34675 verify: check copy source revlog and nodeid
Patrick Mezard <pmezard@gmail.com>
parents: 6211
diff changeset
   234
                    else:
7874
d812029cda85 cleanup: drop variables for unused return values
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 7833
diff changeset
   235
                        fl2.rev(rp[1])
3744
d626fc9e3985 verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents: 3473
diff changeset
   236
            except Exception, inst:
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   237
                exc(lr, _("checking rename of %s") % short(n), inst, f)
3744
d626fc9e3985 verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents: 3473
diff changeset
   238
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   239
        # cross-check
5541
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   240
        if f in filenodes:
7622
4dd7b28003d2 use dict.iteritems() rather than dict.items()
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents: 7361
diff changeset
   241
            fns = [(mf.linkrev(l), n) for n,l in filenodes[f].iteritems()]
8209
a1a5a57efe90 replace util.sort with sorted built-in
Matt Mackall <mpm@selenic.com>
parents: 8164
diff changeset
   242
            for lr, node in sorted(fns):
5541
ceaa752fa316 verify: improve handling of empty or missing files
Matt Mackall <mpm@selenic.com>
parents: 5313
diff changeset
   243
                err(lr, _("%s in manifests not found") % short(node), f)
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   244
6892
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   245
    for f in storefiles:
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   246
        warn(_("warning: orphan revlog '%s'") % f)
dab95717058d verify: check repo.store
Adrian Buehlmann <adrian@cadifra.com>
parents: 6889
diff changeset
   247
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   248
    ui.status(_("%d files, %d changesets, %d total revisions\n") %
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   249
                   (len(files), len(cl), revisions))
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   250
    if warnings[0]:
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   251
        ui.warn(_("%d warnings encountered!\n") % warnings[0])
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   252
    if errors[0]:
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   253
        ui.warn(_("%d integrity errors encountered!\n") % errors[0])
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   254
        if badrevs:
6751
7424a75f919a verify: add some local variables
Matt Mackall <mpm@selenic.com>
parents: 6750
diff changeset
   255
            ui.warn(_("(first damaged changeset appears to be %d)\n")
6752
e79a8f36c2a5 verify: lots of refactoring
Matt Mackall <mpm@selenic.com>
parents: 6751
diff changeset
   256
                    % min(badrevs))
2778
fdc232d8a193 Move repo.verify
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   257
        return 1