comparison tests/test-revlog-raw.py @ 31748:985de02b5b9d

revlog: add a stronger test for raw processing There are some issues about revlog raw processing (flag processor). The test is relatively strong covering many cases. It will verify fixes.
author Jun Wu <quark@fb.com>
date Thu, 30 Mar 2017 20:48:57 -0700
parents
children 8a0c47982ade
comparison
equal deleted inserted replaced
31747:aff7b32b3c05 31748:985de02b5b9d
1 # test revlog interaction about raw data (flagprocessor)
2
3 from __future__ import absolute_import, print_function
4
5 import sys
6
7 from mercurial import (
8 encoding,
9 node,
10 revlog,
11 transaction,
12 vfs,
13 )
14
15 # TESTTMP is optional. This makes it convenient to run without run-tests.py
16 tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp'))
17
18 # Enable generaldelta otherwise revlog won't use delta as expected by the test
19 tvfs.options = {'generaldelta': True, 'revlogv1': True, 'revlogv1': True}
20
21 # The test wants to control whether to use delta explicitly, based on
22 # "storedeltachains".
23 revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains
24
25 def abort(msg):
26 print('abort: %s' % msg)
27 # Return 0 so run-tests.py could compare the output.
28 sys.exit()
29
30 # Register a revlog processor for flag EXTSTORED.
31 #
32 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has
33 # insertion and replacement, and may be interesting to test revlog's line-based
34 # deltas.
35 _extheader = b'E\n'
36
37 def readprocessor(self, rawtext):
38 # True: the returned text could be used to verify hash
39 text = rawtext[len(_extheader):].replace(b'i', b'1')
40 return text, True
41
42 def writeprocessor(self, text):
43 # False: the returned rawtext shouldn't be used to verify hash
44 rawtext = _extheader + text.replace(b'1', b'i')
45 return rawtext, False
46
47 def rawprocessor(self, rawtext):
48 # False: do not verify hash. Only the content returned by "readprocessor"
49 # can be used to verify hash.
50 return False
51
52 revlog.addflagprocessor(revlog.REVIDX_EXTSTORED,
53 (readprocessor, writeprocessor, rawprocessor))
54
55 # Utilities about reading and appending revlog
56
57 def newtransaction():
58 # A transaction is required to write revlogs
59 report = lambda msg: None
60 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal')
61
62 def newrevlog(name=b'_testrevlog.i', recreate=False):
63 if recreate:
64 tvfs.tryunlink(name)
65 rlog = revlog.revlog(tvfs, name)
66 return rlog
67
68 def appendrev(rlog, text, tr, isext=False, isdelta=True):
69 '''Append a revision. If isext is True, set the EXTSTORED flag so flag
70 processor will be used (and rawtext is different from text). If isdelta is
71 True, force the revision to be a delta, otherwise it's full text.
72 '''
73 nextrev = len(rlog)
74 p1 = rlog.node(nextrev - 1)
75 p2 = node.nullid
76 if isext:
77 flags = revlog.REVIDX_EXTSTORED
78 else:
79 flags = revlog.REVIDX_DEFAULT_FLAGS
80 # Change storedeltachains temporarily, to override revlog's delta decision
81 rlog.storedeltachains = isdelta
82 try:
83 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags)
84 return nextrev
85 except Exception as ex:
86 abort('rev %d: failed to append: %s' % (nextrev, ex))
87 finally:
88 # Restore storedeltachains. It is always True, see revlog.__init__
89 rlog.storedeltachains = True
90
91 def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True):
92 '''Copy revlog to destname using revlog.addgroup. Return the copied revlog.
93
94 This emulates push or pull. They use changegroup. Changegroup requires
95 repo to work. We don't have a repo, so a dummy changegroup is used.
96
97 If optimaldelta is True, use optimized delta parent, so the destination
98 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and
99 the destination revlog needs more work to use it.
100
101 This exercises some revlog.addgroup (and revlog._addrevision(text=None))
102 code path, which is not covered by "appendrev" alone.
103 '''
104 class dummychangegroup(object):
105 @staticmethod
106 def deltachunk(pnode):
107 pnode = pnode or node.nullid
108 parentrev = rlog.rev(pnode)
109 r = parentrev + 1
110 if r >= len(rlog):
111 return {}
112 if optimaldelta:
113 deltaparent = parentrev
114 else:
115 # suboptimal deltaparent
116 deltaparent = min(0, parentrev)
117 return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid,
118 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r),
119 'deltabase': rlog.node(deltaparent),
120 'delta': rlog.revdiff(deltaparent, r)}
121
122 def linkmap(lnode):
123 return rlog.rev(lnode)
124
125 dlog = newrevlog(destname, recreate=True)
126 dlog.addgroup(dummychangegroup(), linkmap, tr)
127 return dlog
128
129 def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'):
130 '''Like addgroupcopy, but use the low level revlog._addrevision directly.
131
132 It exercises some code paths that are hard to reach easily otherwise.
133 '''
134 dlog = newrevlog(destname, recreate=True)
135 for r in rlog:
136 p1 = rlog.node(r - 1)
137 p2 = node.nullid
138 if r == 0:
139 text = rlog.revision(r, raw=True)
140 cachedelta = None
141 else:
142 # deltaparent is more interesting if it has the EXTSTORED flag.
143 deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)])
144 text = None
145 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r))
146 flags = rlog.flags(r)
147 ifh = dlog.opener(dlog.indexfile, 'a+')
148 dfh = None
149 if not dlog._inline:
150 dfh = dlog.opener(dlog.datafile, 'a+')
151 dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags, cachedelta,
152 ifh, dfh)
153 return dlog
154
155 # Utilities to generate revisions for testing
156
157 def genbits(n):
158 '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n).
159 i.e. the generated numbers have a width of n bits.
160
161 The combination of two adjacent numbers will cover all possible cases.
162 That is to say, given any x, y where both x, and y are in range(2 ** n),
163 there is an x followed immediately by y in the generated sequence.
164 '''
165 m = 2 ** n
166
167 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code
168 gray = lambda x: x ^ (x >> 1)
169
170 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for
171 # the next unused gray code where higher n bits equal to X.
172
173 # For gray codes whose higher bits are X, a[X] of them have been used.
174 a = [0] * m
175
176 # Iterate from 0.
177 x = 0
178 yield x
179 for i in range(m * m):
180 y = gray(a[x] + x * m) & (m - 1)
181 a[x] += 1
182 x = y
183 yield x
184
185 def gentext(rev):
186 '''Given a revision number, generate dummy text'''
187 return b''.join(b'%d\n' % j for j in range(-1, rev % 5))
188
189 def writecases(rlog, tr):
190 '''Write some revisions interested to the test.
191
192 The test is interested in 3 properties of a revision:
193
194 - Is it a delta or a full text? (isdelta)
195 This is to catch some delta application issues.
196 - Does it have a flag of EXTSTORED? (isext)
197 This is to catch some flag processor issues. Especially when
198 interacted with revlog deltas.
199 - Is its text empty? (isempty)
200 This is less important. It is intended to try to catch some careless
201 checks like "if text" instead of "if text is None". Note: if flag
202 processor is involved, raw text may be not empty.
203
204 Write 65 revisions. So that all combinations of the above flags for
205 adjacent revisions are covered. That is to say,
206
207 len(set(
208 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty)
209 for r in range(len(rlog) - 1)
210 )) is 64.
211
212 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties
213 mentioned above.
214
215 Return expected [(text, rawtext)].
216 '''
217 result = []
218 for i, x in enumerate(genbits(3)):
219 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4)
220 if isempty:
221 text = b''
222 else:
223 text = gentext(i)
224 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta)
225
226 # Verify text, rawtext, and rawsize
227 if isext:
228 rawtext = writeprocessor(None, text)[0]
229 else:
230 rawtext = text
231 if rlog.rawsize(rev) != len(rawtext):
232 abort('rev %d: wrong rawsize' % rev)
233 if rlog.revision(rev, raw=False) != text:
234 abort('rev %d: wrong text' % rev)
235 if rlog.revision(rev, raw=True) != rawtext:
236 abort('rev %d: wrong rawtext' % rev)
237 result.append((text, rawtext))
238
239 # Verify flags like isdelta, isext work as expected
240 if bool(rlog.deltaparent(rev) > -1) != isdelta:
241 abort('rev %d: isdelta is ineffective' % rev)
242 if bool(rlog.flags(rev)) != isext:
243 abort('rev %d: isext is ineffective' % rev)
244 return result
245
246 # Main test and checking
247
248 def checkrevlog(rlog, expected):
249 '''Check if revlog has expected contents. expected is [(text, rawtext)]'''
250 # Test using different access orders. This could expose some issues
251 # depending on revlog caching (see revlog._cache).
252 for r0 in range(len(rlog) - 1):
253 r1 = r0 + 1
254 for revorder in [[r0, r1], [r1, r0]]:
255 for raworder in [[True], [False], [True, False], [False, True]]:
256 nlog = newrevlog()
257 for rev in revorder:
258 for raw in raworder:
259 t = nlog.revision(rev, raw=raw)
260 if t != expected[rev][int(raw)]:
261 abort('rev %d: corrupted %stext'
262 % (rev, raw and 'raw' or ''))
263
264 def maintest():
265 expected = rl = None
266 with newtransaction() as tr:
267 rl = newrevlog(recreate=True)
268 expected = writecases(rl, tr)
269 checkrevlog(rl, expected)
270 print('local test passed')
271 # Copy via revlog.addgroup
272 rl1 = addgroupcopy(rl, tr)
273 checkrevlog(rl1, expected)
274 rl2 = addgroupcopy(rl, tr, optimaldelta=False)
275 checkrevlog(rl2, expected)
276 print('addgroupcopy test passed')
277 # Copy via revlog.clone
278 rl3 = newrevlog(name='_destrevlog3.i', recreate=True)
279 rl.clone(tr, rl3)
280 checkrevlog(rl3, expected)
281 print('clone test passed')
282 # Copy via low-level revlog._addrevision
283 rl4 = lowlevelcopy(rl, tr)
284 checkrevlog(rl4, expected)
285 print('lowlevelcopy test passed')
286
287 try:
288 maintest()
289 except Exception as ex:
290 abort('crashed: %s' % ex)