author | Matt Mackall <mpm@selenic.com> |
Mon, 20 Aug 2007 21:10:45 -0500 | |
changeset 5209 | bbdcdc7f170e |
parent 5179 | 2da57dc04aa8 |
child 5313 | 29be4228303b |
permissions | -rw-r--r-- |
2778 | 1 |
# verify.py - repository integrity checking for Mercurial |
2 |
# |
|
4635
63b9d2deed48
Updated copyright notices and add "and others" to "hg version"
Thomas Arendsen Hein <thomas@intevation.de>
parents:
4395
diff
changeset
|
3 |
# Copyright 2006, 2007 Matt Mackall <mpm@selenic.com> |
2778 | 4 |
# |
5 |
# This software may be used and distributed according to the terms |
|
6 |
# of the GNU General Public License, incorporated herein by reference. |
|
7 |
||
8 |
from node import * |
|
3891 | 9 |
from i18n import _ |
5175
012dbf88b9b2
remove unneeded imports of mdiff
Matt Mackall <mpm@selenic.com>
parents:
4915
diff
changeset
|
10 |
import revlog |
2778 | 11 |
|
12 |
def verify(repo): |
|
4915
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
13 |
lock = repo.lock() |
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
14 |
try: |
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
15 |
return _verify(repo) |
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
16 |
finally: |
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
17 |
del lock |
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
18 |
|
97b734fb9c6f
Use try/finally pattern to cleanup locks and transactions
Matt Mackall <mpm@selenic.com>
parents:
4635
diff
changeset
|
19 |
def _verify(repo): |
2778 | 20 |
filelinkrevs = {} |
21 |
filenodes = {} |
|
22 |
changesets = revisions = files = 0 |
|
23 |
errors = [0] |
|
24 |
warnings = [0] |
|
25 |
neededmanifests = {} |
|
26 |
||
27 |
def err(msg): |
|
28 |
repo.ui.warn(msg + "\n") |
|
29 |
errors[0] += 1 |
|
30 |
||
31 |
def warn(msg): |
|
32 |
repo.ui.warn(msg + "\n") |
|
33 |
warnings[0] += 1 |
|
34 |
||
35 |
def checksize(obj, name): |
|
36 |
d = obj.checksize() |
|
37 |
if d[0]: |
|
38 |
err(_("%s data length off by %d bytes") % (name, d[0])) |
|
39 |
if d[1]: |
|
40 |
err(_("%s index contains %d extra bytes") % (name, d[1])) |
|
41 |
||
42 |
def checkversion(obj, name): |
|
43 |
if obj.version != revlog.REVLOGV0: |
|
44 |
if not revlogv1: |
|
45 |
warn(_("warning: `%s' uses revlog format 1") % name) |
|
46 |
elif revlogv1: |
|
47 |
warn(_("warning: `%s' uses revlog format 0") % name) |
|
48 |
||
4258
b11a2fb59cf5
revlog: simplify revlog version handling
Matt Mackall <mpm@selenic.com>
parents:
3891
diff
changeset
|
49 |
revlogv1 = repo.changelog.version != revlog.REVLOGV0 |
b11a2fb59cf5
revlog: simplify revlog version handling
Matt Mackall <mpm@selenic.com>
parents:
3891
diff
changeset
|
50 |
if repo.ui.verbose or not revlogv1: |
2778 | 51 |
repo.ui.status(_("repository uses revlog format %d\n") % |
52 |
(revlogv1 and 1 or 0)) |
|
53 |
||
54 |
seen = {} |
|
55 |
repo.ui.status(_("checking changesets\n")) |
|
56 |
checksize(repo.changelog, "changelog") |
|
57 |
||
3473
0e68608bd11d
use xrange instead of range
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
3196
diff
changeset
|
58 |
for i in xrange(repo.changelog.count()): |
2778 | 59 |
changesets += 1 |
60 |
n = repo.changelog.node(i) |
|
61 |
l = repo.changelog.linkrev(n) |
|
62 |
if l != i: |
|
63 |
err(_("incorrect link (%d) for changeset revision %d") %(l, i)) |
|
64 |
if n in seen: |
|
65 |
err(_("duplicate changeset at revision %d") % i) |
|
66 |
seen[n] = 1 |
|
67 |
||
68 |
for p in repo.changelog.parents(n): |
|
69 |
if p not in repo.changelog.nodemap: |
|
70 |
err(_("changeset %s has unknown parent %s") % |
|
71 |
(short(n), short(p))) |
|
72 |
try: |
|
73 |
changes = repo.changelog.read(n) |
|
74 |
except KeyboardInterrupt: |
|
75 |
repo.ui.warn(_("interrupted")) |
|
76 |
raise |
|
77 |
except Exception, inst: |
|
78 |
err(_("unpacking changeset %s: %s") % (short(n), inst)) |
|
79 |
continue |
|
80 |
||
81 |
neededmanifests[changes[0]] = n |
|
82 |
||
83 |
for f in changes[3]: |
|
84 |
filelinkrevs.setdefault(f, []).append(i) |
|
85 |
||
86 |
seen = {} |
|
87 |
repo.ui.status(_("checking manifests\n")) |
|
88 |
checkversion(repo.manifest, "manifest") |
|
89 |
checksize(repo.manifest, "manifest") |
|
90 |
||
3473
0e68608bd11d
use xrange instead of range
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
3196
diff
changeset
|
91 |
for i in xrange(repo.manifest.count()): |
2778 | 92 |
n = repo.manifest.node(i) |
93 |
l = repo.manifest.linkrev(n) |
|
94 |
||
95 |
if l < 0 or l >= repo.changelog.count(): |
|
96 |
err(_("bad manifest link (%d) at revision %d") % (l, i)) |
|
97 |
||
98 |
if n in neededmanifests: |
|
99 |
del neededmanifests[n] |
|
100 |
||
101 |
if n in seen: |
|
102 |
err(_("duplicate manifest at revision %d") % i) |
|
103 |
||
104 |
seen[n] = 1 |
|
105 |
||
106 |
for p in repo.manifest.parents(n): |
|
107 |
if p not in repo.manifest.nodemap: |
|
108 |
err(_("manifest %s has unknown parent %s") % |
|
109 |
(short(n), short(p))) |
|
110 |
||
111 |
try: |
|
3196
f3b939444c72
Abstract manifest block parsing.
Brendan Cully <brendan@kublai.com>
parents:
2778
diff
changeset
|
112 |
for f, fn in repo.manifest.readdelta(n).iteritems(): |
f3b939444c72
Abstract manifest block parsing.
Brendan Cully <brendan@kublai.com>
parents:
2778
diff
changeset
|
113 |
filenodes.setdefault(f, {})[fn] = 1 |
2778 | 114 |
except KeyboardInterrupt: |
115 |
repo.ui.warn(_("interrupted")) |
|
116 |
raise |
|
117 |
except Exception, inst: |
|
3196
f3b939444c72
Abstract manifest block parsing.
Brendan Cully <brendan@kublai.com>
parents:
2778
diff
changeset
|
118 |
err(_("reading delta for manifest %s: %s") % (short(n), inst)) |
2778 | 119 |
continue |
120 |
||
121 |
repo.ui.status(_("crosschecking files in changesets and manifests\n")) |
|
122 |
||
123 |
for m, c in neededmanifests.items(): |
|
124 |
err(_("Changeset %s refers to unknown manifest %s") % |
|
125 |
(short(m), short(c))) |
|
126 |
del neededmanifests |
|
127 |
||
128 |
for f in filenodes: |
|
129 |
if f not in filelinkrevs: |
|
130 |
err(_("file %s in manifest but not in changesets") % f) |
|
131 |
||
132 |
for f in filelinkrevs: |
|
133 |
if f not in filenodes: |
|
134 |
err(_("file %s in changeset but not in manifest") % f) |
|
135 |
||
136 |
repo.ui.status(_("checking files\n")) |
|
137 |
ff = filenodes.keys() |
|
138 |
ff.sort() |
|
139 |
for f in ff: |
|
140 |
if f == "/dev/null": |
|
141 |
continue |
|
142 |
files += 1 |
|
143 |
if not f: |
|
144 |
err(_("file without name in manifest %s") % short(n)) |
|
145 |
continue |
|
146 |
fl = repo.file(f) |
|
147 |
checkversion(fl, f) |
|
148 |
checksize(fl, f) |
|
149 |
||
150 |
nodes = {nullid: 1} |
|
151 |
seen = {} |
|
3473
0e68608bd11d
use xrange instead of range
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
3196
diff
changeset
|
152 |
for i in xrange(fl.count()): |
2778 | 153 |
revisions += 1 |
154 |
n = fl.node(i) |
|
155 |
||
156 |
if n in seen: |
|
157 |
err(_("%s: duplicate revision %d") % (f, i)) |
|
158 |
if n not in filenodes[f]: |
|
159 |
err(_("%s: %d:%s not in manifests") % (f, i, short(n))) |
|
160 |
else: |
|
161 |
del filenodes[f][n] |
|
162 |
||
163 |
flr = fl.linkrev(n) |
|
164 |
if flr not in filelinkrevs.get(f, []): |
|
165 |
err(_("%s:%s points to unexpected changeset %d") |
|
166 |
% (f, short(n), flr)) |
|
5179
2da57dc04aa8
verify: report expected linkrev
Matt Mackall <mpm@selenic.com>
parents:
5175
diff
changeset
|
167 |
err(_("expecting one of %s" % filelinkrevs.get(f, []))) |
2778 | 168 |
else: |
169 |
filelinkrevs[f].remove(flr) |
|
170 |
||
171 |
# verify contents |
|
172 |
try: |
|
173 |
t = fl.read(n) |
|
174 |
except KeyboardInterrupt: |
|
175 |
repo.ui.warn(_("interrupted")) |
|
176 |
raise |
|
177 |
except Exception, inst: |
|
178 |
err(_("unpacking file %s %s: %s") % (f, short(n), inst)) |
|
179 |
||
180 |
# verify parents |
|
181 |
(p1, p2) = fl.parents(n) |
|
182 |
if p1 not in nodes: |
|
183 |
err(_("file %s:%s unknown parent 1 %s") % |
|
184 |
(f, short(n), short(p1))) |
|
185 |
if p2 not in nodes: |
|
186 |
err(_("file %s:%s unknown parent 2 %s") % |
|
187 |
(f, short(n), short(p1))) |
|
188 |
nodes[n] = 1 |
|
189 |
||
3744
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
190 |
# check renames |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
191 |
try: |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
192 |
rp = fl.renamed(n) |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
193 |
if rp: |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
194 |
fl2 = repo.file(rp[0]) |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
195 |
rev = fl2.rev(rp[1]) |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
196 |
except KeyboardInterrupt: |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
197 |
repo.ui.warn(_("interrupted")) |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
198 |
raise |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
199 |
except Exception, inst: |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
200 |
err(_("checking rename on file %s %s: %s") % (f, short(n), inst)) |
d626fc9e3985
verify: add rename link checking
Matt Mackall <mpm@selenic.com>
parents:
3473
diff
changeset
|
201 |
|
2778 | 202 |
# cross-check |
203 |
for node in filenodes[f]: |
|
204 |
err(_("node %s in manifests not in %s") % (hex(node), f)) |
|
205 |
||
206 |
repo.ui.status(_("%d files, %d changesets, %d total revisions\n") % |
|
207 |
(files, changesets, revisions)) |
|
208 |
||
209 |
if warnings[0]: |
|
210 |
repo.ui.warn(_("%d warnings encountered!\n") % warnings[0]) |
|
211 |
if errors[0]: |
|
212 |
repo.ui.warn(_("%d integrity errors encountered!\n") % errors[0]) |
|
213 |
return 1 |
|
214 |