Mercurial > hg
comparison tests/test-revlog-raw.py @ 31748:985de02b5b9d
revlog: add a stronger test for raw processing
There are some issues about revlog raw processing (flag processor). The test
is relatively strong covering many cases. It will verify fixes.
author | Jun Wu <quark@fb.com> |
---|---|
date | Thu, 30 Mar 2017 20:48:57 -0700 |
parents | |
children | 8a0c47982ade |
comparison
equal
deleted
inserted
replaced
31747:aff7b32b3c05 | 31748:985de02b5b9d |
---|---|
1 # test revlog interaction about raw data (flagprocessor) | |
2 | |
3 from __future__ import absolute_import, print_function | |
4 | |
5 import sys | |
6 | |
7 from mercurial import ( | |
8 encoding, | |
9 node, | |
10 revlog, | |
11 transaction, | |
12 vfs, | |
13 ) | |
14 | |
15 # TESTTMP is optional. This makes it convenient to run without run-tests.py | |
16 tvfs = vfs.vfs(encoding.environ.get('TESTTMP', b'/tmp')) | |
17 | |
18 # Enable generaldelta otherwise revlog won't use delta as expected by the test | |
19 tvfs.options = {'generaldelta': True, 'revlogv1': True, 'revlogv1': True} | |
20 | |
21 # The test wants to control whether to use delta explicitly, based on | |
22 # "storedeltachains". | |
23 revlog.revlog._isgooddelta = lambda self, d, textlen: self.storedeltachains | |
24 | |
25 def abort(msg): | |
26 print('abort: %s' % msg) | |
27 # Return 0 so run-tests.py could compare the output. | |
28 sys.exit() | |
29 | |
30 # Register a revlog processor for flag EXTSTORED. | |
31 # | |
32 # It simply prepends a fixed header, and replaces '1' to 'i'. So it has | |
33 # insertion and replacement, and may be interesting to test revlog's line-based | |
34 # deltas. | |
35 _extheader = b'E\n' | |
36 | |
37 def readprocessor(self, rawtext): | |
38 # True: the returned text could be used to verify hash | |
39 text = rawtext[len(_extheader):].replace(b'i', b'1') | |
40 return text, True | |
41 | |
42 def writeprocessor(self, text): | |
43 # False: the returned rawtext shouldn't be used to verify hash | |
44 rawtext = _extheader + text.replace(b'1', b'i') | |
45 return rawtext, False | |
46 | |
47 def rawprocessor(self, rawtext): | |
48 # False: do not verify hash. Only the content returned by "readprocessor" | |
49 # can be used to verify hash. | |
50 return False | |
51 | |
52 revlog.addflagprocessor(revlog.REVIDX_EXTSTORED, | |
53 (readprocessor, writeprocessor, rawprocessor)) | |
54 | |
55 # Utilities about reading and appending revlog | |
56 | |
57 def newtransaction(): | |
58 # A transaction is required to write revlogs | |
59 report = lambda msg: None | |
60 return transaction.transaction(report, tvfs, {'plain': tvfs}, b'journal') | |
61 | |
62 def newrevlog(name=b'_testrevlog.i', recreate=False): | |
63 if recreate: | |
64 tvfs.tryunlink(name) | |
65 rlog = revlog.revlog(tvfs, name) | |
66 return rlog | |
67 | |
68 def appendrev(rlog, text, tr, isext=False, isdelta=True): | |
69 '''Append a revision. If isext is True, set the EXTSTORED flag so flag | |
70 processor will be used (and rawtext is different from text). If isdelta is | |
71 True, force the revision to be a delta, otherwise it's full text. | |
72 ''' | |
73 nextrev = len(rlog) | |
74 p1 = rlog.node(nextrev - 1) | |
75 p2 = node.nullid | |
76 if isext: | |
77 flags = revlog.REVIDX_EXTSTORED | |
78 else: | |
79 flags = revlog.REVIDX_DEFAULT_FLAGS | |
80 # Change storedeltachains temporarily, to override revlog's delta decision | |
81 rlog.storedeltachains = isdelta | |
82 try: | |
83 rlog.addrevision(text, tr, nextrev, p1, p2, flags=flags) | |
84 return nextrev | |
85 except Exception as ex: | |
86 abort('rev %d: failed to append: %s' % (nextrev, ex)) | |
87 finally: | |
88 # Restore storedeltachains. It is always True, see revlog.__init__ | |
89 rlog.storedeltachains = True | |
90 | |
91 def addgroupcopy(rlog, tr, destname=b'_destrevlog.i', optimaldelta=True): | |
92 '''Copy revlog to destname using revlog.addgroup. Return the copied revlog. | |
93 | |
94 This emulates push or pull. They use changegroup. Changegroup requires | |
95 repo to work. We don't have a repo, so a dummy changegroup is used. | |
96 | |
97 If optimaldelta is True, use optimized delta parent, so the destination | |
98 revlog could probably reuse it. Otherwise it builds sub-optimal delta, and | |
99 the destination revlog needs more work to use it. | |
100 | |
101 This exercises some revlog.addgroup (and revlog._addrevision(text=None)) | |
102 code path, which is not covered by "appendrev" alone. | |
103 ''' | |
104 class dummychangegroup(object): | |
105 @staticmethod | |
106 def deltachunk(pnode): | |
107 pnode = pnode or node.nullid | |
108 parentrev = rlog.rev(pnode) | |
109 r = parentrev + 1 | |
110 if r >= len(rlog): | |
111 return {} | |
112 if optimaldelta: | |
113 deltaparent = parentrev | |
114 else: | |
115 # suboptimal deltaparent | |
116 deltaparent = min(0, parentrev) | |
117 return {'node': rlog.node(r), 'p1': pnode, 'p2': node.nullid, | |
118 'cs': rlog.node(rlog.linkrev(r)), 'flags': rlog.flags(r), | |
119 'deltabase': rlog.node(deltaparent), | |
120 'delta': rlog.revdiff(deltaparent, r)} | |
121 | |
122 def linkmap(lnode): | |
123 return rlog.rev(lnode) | |
124 | |
125 dlog = newrevlog(destname, recreate=True) | |
126 dlog.addgroup(dummychangegroup(), linkmap, tr) | |
127 return dlog | |
128 | |
129 def lowlevelcopy(rlog, tr, destname=b'_destrevlog.i'): | |
130 '''Like addgroupcopy, but use the low level revlog._addrevision directly. | |
131 | |
132 It exercises some code paths that are hard to reach easily otherwise. | |
133 ''' | |
134 dlog = newrevlog(destname, recreate=True) | |
135 for r in rlog: | |
136 p1 = rlog.node(r - 1) | |
137 p2 = node.nullid | |
138 if r == 0: | |
139 text = rlog.revision(r, raw=True) | |
140 cachedelta = None | |
141 else: | |
142 # deltaparent is more interesting if it has the EXTSTORED flag. | |
143 deltaparent = max([0] + [p for p in range(r - 2) if rlog.flags(p)]) | |
144 text = None | |
145 cachedelta = (deltaparent, rlog.revdiff(deltaparent, r)) | |
146 flags = rlog.flags(r) | |
147 ifh = dlog.opener(dlog.indexfile, 'a+') | |
148 dfh = None | |
149 if not dlog._inline: | |
150 dfh = dlog.opener(dlog.datafile, 'a+') | |
151 dlog._addrevision(rlog.node(r), text, tr, r, p1, p2, flags, cachedelta, | |
152 ifh, dfh) | |
153 return dlog | |
154 | |
155 # Utilities to generate revisions for testing | |
156 | |
157 def genbits(n): | |
158 '''Given a number n, generate (2 ** (n * 2) + 1) numbers in range(2 ** n). | |
159 i.e. the generated numbers have a width of n bits. | |
160 | |
161 The combination of two adjacent numbers will cover all possible cases. | |
162 That is to say, given any x, y where both x, and y are in range(2 ** n), | |
163 there is an x followed immediately by y in the generated sequence. | |
164 ''' | |
165 m = 2 ** n | |
166 | |
167 # Gray Code. See https://en.wikipedia.org/wiki/Gray_code | |
168 gray = lambda x: x ^ (x >> 1) | |
169 | |
170 # Generate (n * 2) bit gray code, yield lower n bits as X, and look for | |
171 # the next unused gray code where higher n bits equal to X. | |
172 | |
173 # For gray codes whose higher bits are X, a[X] of them have been used. | |
174 a = [0] * m | |
175 | |
176 # Iterate from 0. | |
177 x = 0 | |
178 yield x | |
179 for i in range(m * m): | |
180 y = gray(a[x] + x * m) & (m - 1) | |
181 a[x] += 1 | |
182 x = y | |
183 yield x | |
184 | |
185 def gentext(rev): | |
186 '''Given a revision number, generate dummy text''' | |
187 return b''.join(b'%d\n' % j for j in range(-1, rev % 5)) | |
188 | |
189 def writecases(rlog, tr): | |
190 '''Write some revisions interested to the test. | |
191 | |
192 The test is interested in 3 properties of a revision: | |
193 | |
194 - Is it a delta or a full text? (isdelta) | |
195 This is to catch some delta application issues. | |
196 - Does it have a flag of EXTSTORED? (isext) | |
197 This is to catch some flag processor issues. Especially when | |
198 interacted with revlog deltas. | |
199 - Is its text empty? (isempty) | |
200 This is less important. It is intended to try to catch some careless | |
201 checks like "if text" instead of "if text is None". Note: if flag | |
202 processor is involved, raw text may be not empty. | |
203 | |
204 Write 65 revisions. So that all combinations of the above flags for | |
205 adjacent revisions are covered. That is to say, | |
206 | |
207 len(set( | |
208 (r.delta, r.ext, r.empty, (r+1).delta, (r+1).ext, (r+1).empty) | |
209 for r in range(len(rlog) - 1) | |
210 )) is 64. | |
211 | |
212 Where "r.delta", "r.ext", and "r.empty" are booleans matching properties | |
213 mentioned above. | |
214 | |
215 Return expected [(text, rawtext)]. | |
216 ''' | |
217 result = [] | |
218 for i, x in enumerate(genbits(3)): | |
219 isdelta, isext, isempty = bool(x & 1), bool(x & 2), bool(x & 4) | |
220 if isempty: | |
221 text = b'' | |
222 else: | |
223 text = gentext(i) | |
224 rev = appendrev(rlog, text, tr, isext=isext, isdelta=isdelta) | |
225 | |
226 # Verify text, rawtext, and rawsize | |
227 if isext: | |
228 rawtext = writeprocessor(None, text)[0] | |
229 else: | |
230 rawtext = text | |
231 if rlog.rawsize(rev) != len(rawtext): | |
232 abort('rev %d: wrong rawsize' % rev) | |
233 if rlog.revision(rev, raw=False) != text: | |
234 abort('rev %d: wrong text' % rev) | |
235 if rlog.revision(rev, raw=True) != rawtext: | |
236 abort('rev %d: wrong rawtext' % rev) | |
237 result.append((text, rawtext)) | |
238 | |
239 # Verify flags like isdelta, isext work as expected | |
240 if bool(rlog.deltaparent(rev) > -1) != isdelta: | |
241 abort('rev %d: isdelta is ineffective' % rev) | |
242 if bool(rlog.flags(rev)) != isext: | |
243 abort('rev %d: isext is ineffective' % rev) | |
244 return result | |
245 | |
246 # Main test and checking | |
247 | |
248 def checkrevlog(rlog, expected): | |
249 '''Check if revlog has expected contents. expected is [(text, rawtext)]''' | |
250 # Test using different access orders. This could expose some issues | |
251 # depending on revlog caching (see revlog._cache). | |
252 for r0 in range(len(rlog) - 1): | |
253 r1 = r0 + 1 | |
254 for revorder in [[r0, r1], [r1, r0]]: | |
255 for raworder in [[True], [False], [True, False], [False, True]]: | |
256 nlog = newrevlog() | |
257 for rev in revorder: | |
258 for raw in raworder: | |
259 t = nlog.revision(rev, raw=raw) | |
260 if t != expected[rev][int(raw)]: | |
261 abort('rev %d: corrupted %stext' | |
262 % (rev, raw and 'raw' or '')) | |
263 | |
264 def maintest(): | |
265 expected = rl = None | |
266 with newtransaction() as tr: | |
267 rl = newrevlog(recreate=True) | |
268 expected = writecases(rl, tr) | |
269 checkrevlog(rl, expected) | |
270 print('local test passed') | |
271 # Copy via revlog.addgroup | |
272 rl1 = addgroupcopy(rl, tr) | |
273 checkrevlog(rl1, expected) | |
274 rl2 = addgroupcopy(rl, tr, optimaldelta=False) | |
275 checkrevlog(rl2, expected) | |
276 print('addgroupcopy test passed') | |
277 # Copy via revlog.clone | |
278 rl3 = newrevlog(name='_destrevlog3.i', recreate=True) | |
279 rl.clone(tr, rl3) | |
280 checkrevlog(rl3, expected) | |
281 print('clone test passed') | |
282 # Copy via low-level revlog._addrevision | |
283 rl4 = lowlevelcopy(rl, tr) | |
284 checkrevlog(rl4, expected) | |
285 print('lowlevelcopy test passed') | |
286 | |
287 try: | |
288 maintest() | |
289 except Exception as ex: | |
290 abort('crashed: %s' % ex) |