Mercurial > hg
comparison tests/simplestorerepo.py @ 37337:d257c5f2a940
tests: add test extension implementing custom filelog storage
In order to better support partial clones, we'll need alternate
repository storage mechanisms that aren't based on revlogs.
Today, the interface for repository storage isn't very well defined.
And there are various layering violations and assumptions made
throughout the code that storage is backed by revlogs.
In order to support alternate storage mechanisms, we'll need to
formally declare and adhere to interfaces for storage. This will
be a long, arduous process.
This commit creates an extension that implements non-revlog storage
for files. It defines a custom type that quacks like the existing
revlog/filelog API but isn't backed by a revlog. The backing storage
is - for simplicity reasons - a CBOR index and per-node files
representing fulltext data.
The localrepository class is modified so file(f) returns instances of
this class instead of filelog instances.
The purpose of this extension is to tease out what the actual filelog
interface is - based on running the test harness - so we can formalize
that interface and then implement a *real* alternate storage backend.
Using `run-tests.py --extra-config-opt` to run the test harness
with this extension enabled yields 83 failures out of 634 ran
tests.
The most common test failures are due to:
* Issues with `hg verify`
* LFS and largefiles (probably flags processing related)
* Narrow.
* Any test touching or inspecting individual filelog paths.
* help and error output that is confused by the presence of an
extension.
* `hg debug*` commands doing low-level, revlog-y things.
An 88% pass rate is pretty good for an initial implementation if you
ask me!
There is a bit of duplicate code in the new extension. That's by
design: a point of this code is to tease out dependencies on revlog.
That being said, there is opportunity to consolidate code by moving
things out of the revlog API. For example, DAG traversal operations
don't necessarily need to be implemented at the storage level. (Although
for performance reasons they probably do.) Once we have a more
well-defined interface, we could probably define the default
implementations in terms of the base interface, pull those in via
class inheritance, and have implementations override with faster
versions if they so choose. (Or something like that.) But for now,
the duplicate code should be acceptable.
Differential Revision: https://phab.mercurial-scm.org/D3029
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Wed, 04 Apr 2018 11:37:07 -0700 |
parents | |
children | cbc4425e81b5 |
comparison
equal
deleted
inserted
replaced
37336:5d10f41ddcc4 | 37337:d257c5f2a940 |
---|---|
1 # simplestorerepo.py - Extension that swaps in alternate repository storage. | |
2 # | |
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com> | |
4 # | |
5 # This software may be used and distributed according to the terms of the | |
6 # GNU General Public License version 2 or any later version. | |
7 | |
8 from __future__ import absolute_import | |
9 | |
10 from mercurial.i18n import _ | |
11 from mercurial.node import ( | |
12 bin, | |
13 hex, | |
14 nullid, | |
15 nullrev, | |
16 ) | |
17 from mercurial.thirdparty import ( | |
18 cbor, | |
19 ) | |
20 from mercurial import ( | |
21 ancestor, | |
22 error, | |
23 filelog, | |
24 mdiff, | |
25 pycompat, | |
26 revlog, | |
27 ) | |
28 | |
29 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for | |
30 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should | |
31 # be specifying the version(s) of Mercurial they are tested with, or | |
32 # leave the attribute unspecified. | |
33 testedwith = 'ships-with-hg-core' | |
34 | |
35 def validatenode(node): | |
36 if isinstance(node, int): | |
37 raise ValueError('expected node; got int') | |
38 | |
39 if len(node) != 20: | |
40 raise ValueError('expected 20 byte node') | |
41 | |
42 def validaterev(rev): | |
43 if not isinstance(rev, int): | |
44 raise ValueError('expected int') | |
45 | |
46 class filestorage(object): | |
47 """Implements storage for a tracked path. | |
48 | |
49 Data is stored in the VFS in a directory corresponding to the tracked | |
50 path. | |
51 | |
52 Index data is stored in an ``index`` file using CBOR. | |
53 | |
54 Fulltext data is stored in files having names of the node. | |
55 """ | |
56 | |
57 def __init__(self, svfs, path): | |
58 self._svfs = svfs | |
59 self._path = path | |
60 | |
61 self._storepath = b'/'.join([b'data', path]) | |
62 self._indexpath = b'/'.join([self._storepath, b'index']) | |
63 | |
64 indexdata = self._svfs.tryread(self._indexpath) | |
65 if indexdata: | |
66 indexdata = cbor.loads(indexdata) | |
67 | |
68 self._indexdata = indexdata or [] | |
69 self._indexbynode = {} | |
70 self._indexbyrev = {} | |
71 self.index = [] | |
72 self._refreshindex() | |
73 | |
74 # This is used by changegroup code :/ | |
75 self._generaldelta = True | |
76 self.storedeltachains = False | |
77 | |
78 self.version = 1 | |
79 | |
80 def _refreshindex(self): | |
81 self._indexbynode.clear() | |
82 self._indexbyrev.clear() | |
83 self.index = [] | |
84 | |
85 for i, entry in enumerate(self._indexdata): | |
86 self._indexbynode[entry[b'node']] = entry | |
87 self._indexbyrev[i] = entry | |
88 | |
89 self._indexbynode[nullid] = { | |
90 b'node': nullid, | |
91 b'p1': nullid, | |
92 b'p2': nullid, | |
93 b'linkrev': nullrev, | |
94 b'flags': 0, | |
95 } | |
96 | |
97 self._indexbyrev[nullrev] = { | |
98 b'node': nullid, | |
99 b'p1': nullid, | |
100 b'p2': nullid, | |
101 b'linkrev': nullrev, | |
102 b'flags': 0, | |
103 } | |
104 | |
105 for i, entry in enumerate(self._indexdata): | |
106 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node'])) | |
107 | |
108 # start, length, rawsize, chainbase, linkrev, p1, p2, node | |
109 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, | |
110 entry[b'node'])) | |
111 | |
112 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid)) | |
113 | |
114 def __len__(self): | |
115 return len(self._indexdata) | |
116 | |
117 def __iter__(self): | |
118 return iter(range(len(self))) | |
119 | |
120 def revs(self, start=0, stop=None): | |
121 step = 1 | |
122 if stop is not None: | |
123 if start > stop: | |
124 step = -1 | |
125 | |
126 stop += step | |
127 else: | |
128 stop = len(self) | |
129 | |
130 return range(start, stop, step) | |
131 | |
132 def parents(self, node): | |
133 validatenode(node) | |
134 | |
135 if node not in self._indexbynode: | |
136 raise KeyError('unknown node') | |
137 | |
138 entry = self._indexbynode[node] | |
139 | |
140 return entry[b'p1'], entry[b'p2'] | |
141 | |
142 def parentrevs(self, rev): | |
143 p1, p2 = self.parents(self._indexbyrev[rev][b'node']) | |
144 return self.rev(p1), self.rev(p2) | |
145 | |
146 def rev(self, node): | |
147 validatenode(node) | |
148 | |
149 # Will raise KeyError. | |
150 self._indexbynode[node] | |
151 | |
152 for rev, entry in self._indexbyrev.items(): | |
153 if entry[b'node'] == node: | |
154 return rev | |
155 | |
156 raise error.ProgrammingError('this should not occur') | |
157 | |
158 def node(self, rev): | |
159 validaterev(rev) | |
160 | |
161 return self._indexbyrev[rev][b'node'] | |
162 | |
163 def lookup(self, node): | |
164 if isinstance(node, int): | |
165 return self.node(node) | |
166 | |
167 if len(node) == 20: | |
168 try: | |
169 self.rev(node) | |
170 return node | |
171 except LookupError: | |
172 pass | |
173 | |
174 try: | |
175 rev = int(node) | |
176 if '%d' % rev != node: | |
177 raise ValueError | |
178 | |
179 if rev < 0: | |
180 rev = len(self) + rev | |
181 if rev < 0 or rev >= len(self): | |
182 raise ValueError | |
183 | |
184 return self.node(rev) | |
185 except (ValueError, OverflowError): | |
186 pass | |
187 | |
188 if len(node) == 40: | |
189 try: | |
190 rawnode = bin(node) | |
191 self.rev(rawnode) | |
192 return rawnode | |
193 except (TypeError, LookupError): | |
194 pass | |
195 | |
196 raise LookupError(node, self._path, _('invalid lookup input')) | |
197 | |
198 def linkrev(self, rev): | |
199 validaterev(rev) | |
200 | |
201 return self._indexbyrev[rev][b'linkrev'] | |
202 | |
203 def flags(self, rev): | |
204 validaterev(rev) | |
205 | |
206 return self._indexbyrev[rev][b'flags'] | |
207 | |
208 def deltaparent(self, rev): | |
209 validaterev(rev) | |
210 | |
211 p1node = self.parents(self.node(rev))[0] | |
212 return self.rev(p1node) | |
213 | |
214 def candelta(self, baserev, rev): | |
215 validaterev(baserev) | |
216 validaterev(rev) | |
217 | |
218 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) | |
219 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)): | |
220 return False | |
221 | |
222 return True | |
223 | |
224 def rawsize(self, rev): | |
225 validaterev(rev) | |
226 node = self.node(rev) | |
227 return len(self.revision(node, raw=True)) | |
228 | |
229 def _processflags(self, text, flags, operation, raw=False): | |
230 if flags == 0: | |
231 return text, True | |
232 | |
233 validatehash = True | |
234 # Depending on the operation (read or write), the order might be | |
235 # reversed due to non-commutative transforms. | |
236 orderedflags = revlog.REVIDX_FLAGS_ORDER | |
237 if operation == 'write': | |
238 orderedflags = reversed(orderedflags) | |
239 | |
240 for flag in orderedflags: | |
241 # If a flagprocessor has been registered for a known flag, apply the | |
242 # related operation transform and update result tuple. | |
243 if flag & flags: | |
244 vhash = True | |
245 | |
246 if flag not in revlog._flagprocessors: | |
247 message = _("missing processor for flag '%#x'") % (flag) | |
248 raise revlog.RevlogError(message) | |
249 | |
250 processor = revlog._flagprocessors[flag] | |
251 if processor is not None: | |
252 readtransform, writetransform, rawtransform = processor | |
253 | |
254 if raw: | |
255 vhash = rawtransform(self, text) | |
256 elif operation == 'read': | |
257 text, vhash = readtransform(self, text) | |
258 else: # write operation | |
259 text, vhash = writetransform(self, text) | |
260 validatehash = validatehash and vhash | |
261 | |
262 return text, validatehash | |
263 | |
264 def checkhash(self, text, node, p1=None, p2=None, rev=None): | |
265 if p1 is None and p2 is None: | |
266 p1, p2 = self.parents(node) | |
267 if node != revlog.hash(text, p1, p2): | |
268 raise error.RevlogError(_("integrity check failed on %s") % | |
269 self._path) | |
270 | |
271 def revision(self, node, raw=False): | |
272 validatenode(node) | |
273 | |
274 if node == nullid: | |
275 return b'' | |
276 | |
277 self._indexbynode[node] | |
278 | |
279 rev = self.rev(node) | |
280 flags = self.flags(rev) | |
281 | |
282 path = b'/'.join([self._storepath, hex(node)]) | |
283 rawtext = self._svfs.read(path) | |
284 | |
285 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw) | |
286 if validatehash: | |
287 self.checkhash(text, node, rev=rev) | |
288 | |
289 return text | |
290 | |
291 def read(self, node): | |
292 validatenode(node) | |
293 | |
294 revision = self.revision(node) | |
295 | |
296 if not revision.startswith(b'\1\n'): | |
297 return revision | |
298 | |
299 start = revision.index(b'\1\n', 2) | |
300 return revision[start + 2:] | |
301 | |
302 def renamed(self, node): | |
303 validatenode(node) | |
304 | |
305 if self.parents(node)[0] != nullid: | |
306 return False | |
307 | |
308 fulltext = self.revision(node) | |
309 m = filelog.parsemeta(fulltext)[0] | |
310 | |
311 if m and 'copy' in m: | |
312 return m['copy'], bin(m['copyrev']) | |
313 | |
314 return False | |
315 | |
316 def cmp(self, node, text): | |
317 validatenode(node) | |
318 | |
319 t = text | |
320 | |
321 if text.startswith(b'\1\n'): | |
322 t = b'\1\n\1\n' + text | |
323 | |
324 p1, p2 = self.parents(node) | |
325 | |
326 if revlog.hash(t, p1, p2) == node: | |
327 return False | |
328 | |
329 if self.iscensored(self.rev(node)): | |
330 return text != b'' | |
331 | |
332 if self.renamed(node): | |
333 t2 = self.read(node) | |
334 return t2 != text | |
335 | |
336 return True | |
337 | |
338 def size(self, rev): | |
339 validaterev(rev) | |
340 | |
341 node = self._indexbyrev[rev][b'node'] | |
342 | |
343 if self.renamed(node): | |
344 return len(self.read(node)) | |
345 | |
346 if self.iscensored(rev): | |
347 return 0 | |
348 | |
349 return len(self.revision(node)) | |
350 | |
351 def iscensored(self, rev): | |
352 validaterev(rev) | |
353 | |
354 return self.flags(rev) & revlog.REVIDX_ISCENSORED | |
355 | |
356 def commonancestorsheads(self, a, b): | |
357 validatenode(a) | |
358 validatenode(b) | |
359 | |
360 a = self.rev(a) | |
361 b = self.rev(b) | |
362 | |
363 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b) | |
364 return pycompat.maplist(self.node, ancestors) | |
365 | |
366 def descendants(self, revs): | |
367 # This is a copy of revlog.descendants() | |
368 first = min(revs) | |
369 if first == nullrev: | |
370 for i in self: | |
371 yield i | |
372 return | |
373 | |
374 seen = set(revs) | |
375 for i in self.revs(start=first + 1): | |
376 for x in self.parentrevs(i): | |
377 if x != nullrev and x in seen: | |
378 seen.add(i) | |
379 yield i | |
380 break | |
381 | |
382 # Required by verify. | |
383 def files(self): | |
384 entries = self._svfs.listdir(self._storepath) | |
385 | |
386 # Strip out undo.backup.* files created as part of transaction | |
387 # recording. | |
388 entries = [f for f in entries if not f.startswith('undo.backup.')] | |
389 | |
390 return [b'/'.join((self._storepath, f)) for f in entries] | |
391 | |
392 # Required by verify. | |
393 def checksize(self): | |
394 return 0, 0 | |
395 | |
396 def add(self, text, meta, transaction, linkrev, p1, p2): | |
397 if meta or text.startswith(b'\1\n'): | |
398 text = filelog.packmeta(meta, text) | |
399 | |
400 return self.addrevision(text, transaction, linkrev, p1, p2) | |
401 | |
402 def addrevision(self, text, transaction, linkrev, p1, p2, node=None, | |
403 flags=0): | |
404 validatenode(p1) | |
405 validatenode(p2) | |
406 | |
407 if flags: | |
408 node = node or revlog.hash(text, p1, p2) | |
409 | |
410 rawtext, validatehash = self._processflags(text, flags, 'write') | |
411 | |
412 node = node or revlog.hash(text, p1, p2) | |
413 | |
414 if node in self._indexbynode: | |
415 return node | |
416 | |
417 if validatehash: | |
418 self.checkhash(rawtext, node, p1=p1, p2=p2) | |
419 | |
420 path = b'/'.join([self._storepath, hex(node)]) | |
421 | |
422 self._svfs.write(path, text) | |
423 | |
424 self._indexdata.append({ | |
425 b'node': node, | |
426 b'p1': p1, | |
427 b'p2': p2, | |
428 b'linkrev': linkrev, | |
429 b'flags': flags, | |
430 }) | |
431 | |
432 self._reflectindexupdate() | |
433 | |
434 return node | |
435 | |
436 def _reflectindexupdate(self): | |
437 self._refreshindex() | |
438 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata)) | |
439 | |
440 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None): | |
441 nodes = [] | |
442 | |
443 transaction.addbackup(self._indexpath) | |
444 | |
445 for node, p1, p2, linknode, deltabase, delta, flags in deltas: | |
446 linkrev = linkmapper(linknode) | |
447 | |
448 nodes.append(node) | |
449 | |
450 if node in self._indexbynode: | |
451 continue | |
452 | |
453 # Need to resolve the fulltext from the delta base. | |
454 if deltabase == nullid: | |
455 text = mdiff.patch(b'', delta) | |
456 else: | |
457 text = mdiff.patch(self.revision(deltabase), delta) | |
458 | |
459 self.addrevision(text, transaction, linkrev, p1, p2, flags) | |
460 | |
461 if addrevisioncb: | |
462 addrevisioncb(self, node) | |
463 | |
464 return nodes | |
465 | |
466 def revdiff(self, rev1, rev2): | |
467 validaterev(rev1) | |
468 validaterev(rev2) | |
469 | |
470 node1 = self.node(rev1) | |
471 node2 = self.node(rev2) | |
472 | |
473 return mdiff.textdiff(self.revision(node1, raw=True), | |
474 self.revision(node2, raw=True)) | |
475 | |
476 def headrevs(self): | |
477 # Assume all revisions are heads by default. | |
478 ishead = {rev: True for rev in self._indexbyrev} | |
479 | |
480 for rev, entry in self._indexbyrev.items(): | |
481 # Unset head flag for all seen parents. | |
482 ishead[self.rev(entry[b'p1'])] = False | |
483 ishead[self.rev(entry[b'p2'])] = False | |
484 | |
485 return [rev for rev, ishead in sorted(ishead.items()) | |
486 if ishead] | |
487 | |
488 def heads(self, start=None, stop=None): | |
489 # This is copied from revlog.py. | |
490 if start is None and stop is None: | |
491 if not len(self): | |
492 return [nullid] | |
493 return [self.node(r) for r in self.headrevs()] | |
494 | |
495 if start is None: | |
496 start = nullid | |
497 if stop is None: | |
498 stop = [] | |
499 stoprevs = set([self.rev(n) for n in stop]) | |
500 startrev = self.rev(start) | |
501 reachable = {startrev} | |
502 heads = {startrev} | |
503 | |
504 parentrevs = self.parentrevs | |
505 for r in self.revs(start=startrev + 1): | |
506 for p in parentrevs(r): | |
507 if p in reachable: | |
508 if r not in stoprevs: | |
509 reachable.add(r) | |
510 heads.add(r) | |
511 if p in heads and p not in stoprevs: | |
512 heads.remove(p) | |
513 | |
514 return [self.node(r) for r in heads] | |
515 | |
516 def children(self, node): | |
517 validatenode(node) | |
518 | |
519 # This is a copy of revlog.children(). | |
520 c = [] | |
521 p = self.rev(node) | |
522 for r in self.revs(start=p + 1): | |
523 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev] | |
524 if prevs: | |
525 for pr in prevs: | |
526 if pr == p: | |
527 c.append(self.node(r)) | |
528 elif p == nullrev: | |
529 c.append(self.node(r)) | |
530 return c | |
531 | |
532 def getstrippoint(self, minlink): | |
533 | |
534 # This is largely a copy of revlog.getstrippoint(). | |
535 brokenrevs = set() | |
536 strippoint = len(self) | |
537 | |
538 heads = {} | |
539 futurelargelinkrevs = set() | |
540 for head in self.headrevs(): | |
541 headlinkrev = self.linkrev(head) | |
542 heads[head] = headlinkrev | |
543 if headlinkrev >= minlink: | |
544 futurelargelinkrevs.add(headlinkrev) | |
545 | |
546 # This algorithm involves walking down the rev graph, starting at the | |
547 # heads. Since the revs are topologically sorted according to linkrev, | |
548 # once all head linkrevs are below the minlink, we know there are | |
549 # no more revs that could have a linkrev greater than minlink. | |
550 # So we can stop walking. | |
551 while futurelargelinkrevs: | |
552 strippoint -= 1 | |
553 linkrev = heads.pop(strippoint) | |
554 | |
555 if linkrev < minlink: | |
556 brokenrevs.add(strippoint) | |
557 else: | |
558 futurelargelinkrevs.remove(linkrev) | |
559 | |
560 for p in self.parentrevs(strippoint): | |
561 if p != nullrev: | |
562 plinkrev = self.linkrev(p) | |
563 heads[p] = plinkrev | |
564 if plinkrev >= minlink: | |
565 futurelargelinkrevs.add(plinkrev) | |
566 | |
567 return strippoint, brokenrevs | |
568 | |
569 def strip(self, minlink, transaction): | |
570 if not len(self): | |
571 return | |
572 | |
573 rev, _ignored = self.getstrippoint(minlink) | |
574 if rev == len(self): | |
575 return | |
576 | |
577 # Purge index data starting at the requested revision. | |
578 self._indexdata[rev:] = [] | |
579 self._reflectindexupdate() | |
580 | |
581 def reposetup(ui, repo): | |
582 if not repo.local(): | |
583 return | |
584 | |
585 class simplestorerepo(repo.__class__): | |
586 def file(self, f): | |
587 return filestorage(self.svfs, f) | |
588 | |
589 repo.__class__ = simplestorerepo |