comparison tests/simplestorerepo.py @ 37337:d257c5f2a940

tests: add test extension implementing custom filelog storage In order to better support partial clones, we'll need alternate repository storage mechanisms that aren't based on revlogs. Today, the interface for repository storage isn't very well defined. And there are various layering violations and assumptions made throughout the code that storage is backed by revlogs. In order to support alternate storage mechanisms, we'll need to formally declare and adhere to interfaces for storage. This will be a long, arduous process. This commit creates an extension that implements non-revlog storage for files. It defines a custom type that quacks like the existing revlog/filelog API but isn't backed by a revlog. The backing storage is - for simplicity reasons - a CBOR index and per-node files representing fulltext data. The localrepository class is modified so file(f) returns instances of this class instead of filelog instances. The purpose of this extension is to tease out what the actual filelog interface is - based on running the test harness - so we can formalize that interface and then implement a *real* alternate storage backend. Using `run-tests.py --extra-config-opt` to run the test harness with this extension enabled yields 83 failures out of 634 ran tests. The most common test failures are due to: * Issues with `hg verify` * LFS and largefiles (probably flags processing related) * Narrow. * Any test touching or inspecting individual filelog paths. * help and error output that is confused by the presence of an extension. * `hg debug*` commands doing low-level, revlog-y things. An 88% pass rate is pretty good for an initial implementation if you ask me! There is a bit of duplicate code in the new extension. That's by design: a point of this code is to tease out dependencies on revlog. That being said, there is opportunity to consolidate code by moving things out of the revlog API. For example, DAG traversal operations don't necessarily need to be implemented at the storage level. (Although for performance reasons they probably do.) Once we have a more well-defined interface, we could probably define the default implementations in terms of the base interface, pull those in via class inheritance, and have implementations override with faster versions if they so choose. (Or something like that.) But for now, the duplicate code should be acceptable. Differential Revision: https://phab.mercurial-scm.org/D3029
author Gregory Szorc <gregory.szorc@gmail.com>
date Wed, 04 Apr 2018 11:37:07 -0700
parents
children cbc4425e81b5
comparison
equal deleted inserted replaced
37336:5d10f41ddcc4 37337:d257c5f2a940
1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 #
3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 from mercurial.i18n import _
11 from mercurial.node import (
12 bin,
13 hex,
14 nullid,
15 nullrev,
16 )
17 from mercurial.thirdparty import (
18 cbor,
19 )
20 from mercurial import (
21 ancestor,
22 error,
23 filelog,
24 mdiff,
25 pycompat,
26 revlog,
27 )
28
29 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
30 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
31 # be specifying the version(s) of Mercurial they are tested with, or
32 # leave the attribute unspecified.
33 testedwith = 'ships-with-hg-core'
34
35 def validatenode(node):
36 if isinstance(node, int):
37 raise ValueError('expected node; got int')
38
39 if len(node) != 20:
40 raise ValueError('expected 20 byte node')
41
42 def validaterev(rev):
43 if not isinstance(rev, int):
44 raise ValueError('expected int')
45
46 class filestorage(object):
47 """Implements storage for a tracked path.
48
49 Data is stored in the VFS in a directory corresponding to the tracked
50 path.
51
52 Index data is stored in an ``index`` file using CBOR.
53
54 Fulltext data is stored in files having names of the node.
55 """
56
57 def __init__(self, svfs, path):
58 self._svfs = svfs
59 self._path = path
60
61 self._storepath = b'/'.join([b'data', path])
62 self._indexpath = b'/'.join([self._storepath, b'index'])
63
64 indexdata = self._svfs.tryread(self._indexpath)
65 if indexdata:
66 indexdata = cbor.loads(indexdata)
67
68 self._indexdata = indexdata or []
69 self._indexbynode = {}
70 self._indexbyrev = {}
71 self.index = []
72 self._refreshindex()
73
74 # This is used by changegroup code :/
75 self._generaldelta = True
76 self.storedeltachains = False
77
78 self.version = 1
79
80 def _refreshindex(self):
81 self._indexbynode.clear()
82 self._indexbyrev.clear()
83 self.index = []
84
85 for i, entry in enumerate(self._indexdata):
86 self._indexbynode[entry[b'node']] = entry
87 self._indexbyrev[i] = entry
88
89 self._indexbynode[nullid] = {
90 b'node': nullid,
91 b'p1': nullid,
92 b'p2': nullid,
93 b'linkrev': nullrev,
94 b'flags': 0,
95 }
96
97 self._indexbyrev[nullrev] = {
98 b'node': nullid,
99 b'p1': nullid,
100 b'p2': nullid,
101 b'linkrev': nullrev,
102 b'flags': 0,
103 }
104
105 for i, entry in enumerate(self._indexdata):
106 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
107
108 # start, length, rawsize, chainbase, linkrev, p1, p2, node
109 self.index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
110 entry[b'node']))
111
112 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
113
114 def __len__(self):
115 return len(self._indexdata)
116
117 def __iter__(self):
118 return iter(range(len(self)))
119
120 def revs(self, start=0, stop=None):
121 step = 1
122 if stop is not None:
123 if start > stop:
124 step = -1
125
126 stop += step
127 else:
128 stop = len(self)
129
130 return range(start, stop, step)
131
132 def parents(self, node):
133 validatenode(node)
134
135 if node not in self._indexbynode:
136 raise KeyError('unknown node')
137
138 entry = self._indexbynode[node]
139
140 return entry[b'p1'], entry[b'p2']
141
142 def parentrevs(self, rev):
143 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
144 return self.rev(p1), self.rev(p2)
145
146 def rev(self, node):
147 validatenode(node)
148
149 # Will raise KeyError.
150 self._indexbynode[node]
151
152 for rev, entry in self._indexbyrev.items():
153 if entry[b'node'] == node:
154 return rev
155
156 raise error.ProgrammingError('this should not occur')
157
158 def node(self, rev):
159 validaterev(rev)
160
161 return self._indexbyrev[rev][b'node']
162
163 def lookup(self, node):
164 if isinstance(node, int):
165 return self.node(node)
166
167 if len(node) == 20:
168 try:
169 self.rev(node)
170 return node
171 except LookupError:
172 pass
173
174 try:
175 rev = int(node)
176 if '%d' % rev != node:
177 raise ValueError
178
179 if rev < 0:
180 rev = len(self) + rev
181 if rev < 0 or rev >= len(self):
182 raise ValueError
183
184 return self.node(rev)
185 except (ValueError, OverflowError):
186 pass
187
188 if len(node) == 40:
189 try:
190 rawnode = bin(node)
191 self.rev(rawnode)
192 return rawnode
193 except (TypeError, LookupError):
194 pass
195
196 raise LookupError(node, self._path, _('invalid lookup input'))
197
198 def linkrev(self, rev):
199 validaterev(rev)
200
201 return self._indexbyrev[rev][b'linkrev']
202
203 def flags(self, rev):
204 validaterev(rev)
205
206 return self._indexbyrev[rev][b'flags']
207
208 def deltaparent(self, rev):
209 validaterev(rev)
210
211 p1node = self.parents(self.node(rev))[0]
212 return self.rev(p1node)
213
214 def candelta(self, baserev, rev):
215 validaterev(baserev)
216 validaterev(rev)
217
218 if ((self.flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
219 or (self.flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
220 return False
221
222 return True
223
224 def rawsize(self, rev):
225 validaterev(rev)
226 node = self.node(rev)
227 return len(self.revision(node, raw=True))
228
229 def _processflags(self, text, flags, operation, raw=False):
230 if flags == 0:
231 return text, True
232
233 validatehash = True
234 # Depending on the operation (read or write), the order might be
235 # reversed due to non-commutative transforms.
236 orderedflags = revlog.REVIDX_FLAGS_ORDER
237 if operation == 'write':
238 orderedflags = reversed(orderedflags)
239
240 for flag in orderedflags:
241 # If a flagprocessor has been registered for a known flag, apply the
242 # related operation transform and update result tuple.
243 if flag & flags:
244 vhash = True
245
246 if flag not in revlog._flagprocessors:
247 message = _("missing processor for flag '%#x'") % (flag)
248 raise revlog.RevlogError(message)
249
250 processor = revlog._flagprocessors[flag]
251 if processor is not None:
252 readtransform, writetransform, rawtransform = processor
253
254 if raw:
255 vhash = rawtransform(self, text)
256 elif operation == 'read':
257 text, vhash = readtransform(self, text)
258 else: # write operation
259 text, vhash = writetransform(self, text)
260 validatehash = validatehash and vhash
261
262 return text, validatehash
263
264 def checkhash(self, text, node, p1=None, p2=None, rev=None):
265 if p1 is None and p2 is None:
266 p1, p2 = self.parents(node)
267 if node != revlog.hash(text, p1, p2):
268 raise error.RevlogError(_("integrity check failed on %s") %
269 self._path)
270
271 def revision(self, node, raw=False):
272 validatenode(node)
273
274 if node == nullid:
275 return b''
276
277 self._indexbynode[node]
278
279 rev = self.rev(node)
280 flags = self.flags(rev)
281
282 path = b'/'.join([self._storepath, hex(node)])
283 rawtext = self._svfs.read(path)
284
285 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
286 if validatehash:
287 self.checkhash(text, node, rev=rev)
288
289 return text
290
291 def read(self, node):
292 validatenode(node)
293
294 revision = self.revision(node)
295
296 if not revision.startswith(b'\1\n'):
297 return revision
298
299 start = revision.index(b'\1\n', 2)
300 return revision[start + 2:]
301
302 def renamed(self, node):
303 validatenode(node)
304
305 if self.parents(node)[0] != nullid:
306 return False
307
308 fulltext = self.revision(node)
309 m = filelog.parsemeta(fulltext)[0]
310
311 if m and 'copy' in m:
312 return m['copy'], bin(m['copyrev'])
313
314 return False
315
316 def cmp(self, node, text):
317 validatenode(node)
318
319 t = text
320
321 if text.startswith(b'\1\n'):
322 t = b'\1\n\1\n' + text
323
324 p1, p2 = self.parents(node)
325
326 if revlog.hash(t, p1, p2) == node:
327 return False
328
329 if self.iscensored(self.rev(node)):
330 return text != b''
331
332 if self.renamed(node):
333 t2 = self.read(node)
334 return t2 != text
335
336 return True
337
338 def size(self, rev):
339 validaterev(rev)
340
341 node = self._indexbyrev[rev][b'node']
342
343 if self.renamed(node):
344 return len(self.read(node))
345
346 if self.iscensored(rev):
347 return 0
348
349 return len(self.revision(node))
350
351 def iscensored(self, rev):
352 validaterev(rev)
353
354 return self.flags(rev) & revlog.REVIDX_ISCENSORED
355
356 def commonancestorsheads(self, a, b):
357 validatenode(a)
358 validatenode(b)
359
360 a = self.rev(a)
361 b = self.rev(b)
362
363 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
364 return pycompat.maplist(self.node, ancestors)
365
366 def descendants(self, revs):
367 # This is a copy of revlog.descendants()
368 first = min(revs)
369 if first == nullrev:
370 for i in self:
371 yield i
372 return
373
374 seen = set(revs)
375 for i in self.revs(start=first + 1):
376 for x in self.parentrevs(i):
377 if x != nullrev and x in seen:
378 seen.add(i)
379 yield i
380 break
381
382 # Required by verify.
383 def files(self):
384 entries = self._svfs.listdir(self._storepath)
385
386 # Strip out undo.backup.* files created as part of transaction
387 # recording.
388 entries = [f for f in entries if not f.startswith('undo.backup.')]
389
390 return [b'/'.join((self._storepath, f)) for f in entries]
391
392 # Required by verify.
393 def checksize(self):
394 return 0, 0
395
396 def add(self, text, meta, transaction, linkrev, p1, p2):
397 if meta or text.startswith(b'\1\n'):
398 text = filelog.packmeta(meta, text)
399
400 return self.addrevision(text, transaction, linkrev, p1, p2)
401
402 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
403 flags=0):
404 validatenode(p1)
405 validatenode(p2)
406
407 if flags:
408 node = node or revlog.hash(text, p1, p2)
409
410 rawtext, validatehash = self._processflags(text, flags, 'write')
411
412 node = node or revlog.hash(text, p1, p2)
413
414 if node in self._indexbynode:
415 return node
416
417 if validatehash:
418 self.checkhash(rawtext, node, p1=p1, p2=p2)
419
420 path = b'/'.join([self._storepath, hex(node)])
421
422 self._svfs.write(path, text)
423
424 self._indexdata.append({
425 b'node': node,
426 b'p1': p1,
427 b'p2': p2,
428 b'linkrev': linkrev,
429 b'flags': flags,
430 })
431
432 self._reflectindexupdate()
433
434 return node
435
436 def _reflectindexupdate(self):
437 self._refreshindex()
438 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
439
440 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
441 nodes = []
442
443 transaction.addbackup(self._indexpath)
444
445 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
446 linkrev = linkmapper(linknode)
447
448 nodes.append(node)
449
450 if node in self._indexbynode:
451 continue
452
453 # Need to resolve the fulltext from the delta base.
454 if deltabase == nullid:
455 text = mdiff.patch(b'', delta)
456 else:
457 text = mdiff.patch(self.revision(deltabase), delta)
458
459 self.addrevision(text, transaction, linkrev, p1, p2, flags)
460
461 if addrevisioncb:
462 addrevisioncb(self, node)
463
464 return nodes
465
466 def revdiff(self, rev1, rev2):
467 validaterev(rev1)
468 validaterev(rev2)
469
470 node1 = self.node(rev1)
471 node2 = self.node(rev2)
472
473 return mdiff.textdiff(self.revision(node1, raw=True),
474 self.revision(node2, raw=True))
475
476 def headrevs(self):
477 # Assume all revisions are heads by default.
478 ishead = {rev: True for rev in self._indexbyrev}
479
480 for rev, entry in self._indexbyrev.items():
481 # Unset head flag for all seen parents.
482 ishead[self.rev(entry[b'p1'])] = False
483 ishead[self.rev(entry[b'p2'])] = False
484
485 return [rev for rev, ishead in sorted(ishead.items())
486 if ishead]
487
488 def heads(self, start=None, stop=None):
489 # This is copied from revlog.py.
490 if start is None and stop is None:
491 if not len(self):
492 return [nullid]
493 return [self.node(r) for r in self.headrevs()]
494
495 if start is None:
496 start = nullid
497 if stop is None:
498 stop = []
499 stoprevs = set([self.rev(n) for n in stop])
500 startrev = self.rev(start)
501 reachable = {startrev}
502 heads = {startrev}
503
504 parentrevs = self.parentrevs
505 for r in self.revs(start=startrev + 1):
506 for p in parentrevs(r):
507 if p in reachable:
508 if r not in stoprevs:
509 reachable.add(r)
510 heads.add(r)
511 if p in heads and p not in stoprevs:
512 heads.remove(p)
513
514 return [self.node(r) for r in heads]
515
516 def children(self, node):
517 validatenode(node)
518
519 # This is a copy of revlog.children().
520 c = []
521 p = self.rev(node)
522 for r in self.revs(start=p + 1):
523 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
524 if prevs:
525 for pr in prevs:
526 if pr == p:
527 c.append(self.node(r))
528 elif p == nullrev:
529 c.append(self.node(r))
530 return c
531
532 def getstrippoint(self, minlink):
533
534 # This is largely a copy of revlog.getstrippoint().
535 brokenrevs = set()
536 strippoint = len(self)
537
538 heads = {}
539 futurelargelinkrevs = set()
540 for head in self.headrevs():
541 headlinkrev = self.linkrev(head)
542 heads[head] = headlinkrev
543 if headlinkrev >= minlink:
544 futurelargelinkrevs.add(headlinkrev)
545
546 # This algorithm involves walking down the rev graph, starting at the
547 # heads. Since the revs are topologically sorted according to linkrev,
548 # once all head linkrevs are below the minlink, we know there are
549 # no more revs that could have a linkrev greater than minlink.
550 # So we can stop walking.
551 while futurelargelinkrevs:
552 strippoint -= 1
553 linkrev = heads.pop(strippoint)
554
555 if linkrev < minlink:
556 brokenrevs.add(strippoint)
557 else:
558 futurelargelinkrevs.remove(linkrev)
559
560 for p in self.parentrevs(strippoint):
561 if p != nullrev:
562 plinkrev = self.linkrev(p)
563 heads[p] = plinkrev
564 if plinkrev >= minlink:
565 futurelargelinkrevs.add(plinkrev)
566
567 return strippoint, brokenrevs
568
569 def strip(self, minlink, transaction):
570 if not len(self):
571 return
572
573 rev, _ignored = self.getstrippoint(minlink)
574 if rev == len(self):
575 return
576
577 # Purge index data starting at the requested revision.
578 self._indexdata[rev:] = []
579 self._reflectindexupdate()
580
581 def reposetup(ui, repo):
582 if not repo.local():
583 return
584
585 class simplestorerepo(repo.__class__):
586 def file(self, f):
587 return filestorage(self.svfs, f)
588
589 repo.__class__ = simplestorerepo