Mercurial > hg
comparison mercurial/exchangev2.py @ 40393:229d23cdb203
exchangev2: support fetching shallow files history
This commit teaches the exchangev2 client code to handle fetching shallow
files data.
Only shallow fetching of files data is supported: shallow fetching of
changeset and manifest data is explicitly not yet supported.
Previously, we would fetch file revisions for changesets that were received
by the current pull operation. In the new model, we calculate the set of
"relevant" changesets given the pull depth and only fetch files data for
those changesets.
We also teach the "filesdata" command invocation to vary parameters as needed.
The implementation here is far from complete or optimal. Subsequent pulls will
end up re-fetching a lot of files data. But the application of this data should
mostly be a no-op on the client, so it isn't a big deal.
Depending on the order file revisions are fetched in, revisions could get
inserted with the wrong revision number relationships. I think the best way
to deal with this is to remove revision numbers from storage and to either
dynamically derive them (by reconstructing a DAG from nodes/parents) or remove
revision numbers from the file storage interface completely.
A missing API that we'll likely want to write pretty soon is "ensure files
for revision(s) are present." We can kind of cajole exchangev2.pull() to do
this. But it isn't very efficient. For example, in simple cases like
widening the store to obtain data for a single revision, it is probably
more efficient to walk the manifest and find exactly which file revisions
are missing and to make explicit requests for just their data. In more
advanced cases, asking the server for all files data may be more efficient,
even though it requires sending data the client already has. There is tons
of room for future experimentation here. And TBH I'm not sure what the
final state will be.
Anyway, this commit gets us pretty close to being able to have shallow
and narrow checkouts with exchangev2/sqlite storage. Close enough that a
minimal extension should be able to provide fill in the gaps until the code
in core stabilizes and there is a user-facing way to trigger the
narrow/shallow bits from `hg clone` without also implying using of the
narrow extension...
Differential Revision: https://phab.mercurial-scm.org/D5169
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Fri, 19 Oct 2018 12:30:49 +0200 |
parents | 00a4cd368e3f |
children | afa884015e66 |
comparison
equal
deleted
inserted
replaced
40392:595641bd8404 | 40393:229d23cdb203 |
---|---|
20 error, | 20 error, |
21 mdiff, | 21 mdiff, |
22 narrowspec, | 22 narrowspec, |
23 phases, | 23 phases, |
24 pycompat, | 24 pycompat, |
25 repository, | |
25 setdiscovery, | 26 setdiscovery, |
26 ) | 27 ) |
27 | 28 |
28 def pull(pullop): | 29 def pull(pullop): |
29 """Pull using wire protocol version 2.""" | 30 """Pull using wire protocol version 2.""" |
89 remote.url(), pullop.gettransaction, | 90 remote.url(), pullop.gettransaction, |
90 explicit=pullop.explicitbookmarks) | 91 explicit=pullop.explicitbookmarks) |
91 | 92 |
92 manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes']) | 93 manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes']) |
93 | 94 |
95 # We don't properly support shallow changeset and manifest yet. So we apply | |
96 # depth limiting locally. | |
97 if pullop.depth: | |
98 relevantcsetnodes = set() | |
99 clnode = repo.changelog.node | |
100 | |
101 for rev in repo.revs(b'ancestors(%ln, %d)', | |
102 pullheads, pullop.depth - 1): | |
103 relevantcsetnodes.add(clnode(rev)) | |
104 | |
105 csetrelevantfilter = lambda n: n in relevantcsetnodes | |
106 | |
107 else: | |
108 csetrelevantfilter = lambda n: True | |
109 | |
94 # If obtaining the raw store files, we need to scan the full repo to | 110 # If obtaining the raw store files, we need to scan the full repo to |
95 # derive all the changesets, manifests, and linkrevs. | 111 # derive all the changesets, manifests, and linkrevs. |
96 if usingrawchangelogandmanifest: | 112 if usingrawchangelogandmanifest: |
97 csetsforfiles = [] | 113 csetsforfiles = [] |
98 mnodesforfiles = [] | 114 mnodesforfiles = [] |
99 manifestlinkrevs = {} | 115 manifestlinkrevs = {} |
100 | 116 |
101 for rev in repo: | 117 for rev in repo: |
102 ctx = repo[rev] | 118 ctx = repo[rev] |
119 node = ctx.node() | |
120 | |
121 if not csetrelevantfilter(node): | |
122 continue | |
123 | |
103 mnode = ctx.manifestnode() | 124 mnode = ctx.manifestnode() |
104 | 125 |
105 csetsforfiles.append(ctx.node()) | 126 csetsforfiles.append(node) |
106 mnodesforfiles.append(mnode) | 127 mnodesforfiles.append(mnode) |
107 manifestlinkrevs[mnode] = rev | 128 manifestlinkrevs[mnode] = rev |
108 | 129 |
109 else: | 130 else: |
110 csetsforfiles = csetres['added'] | 131 csetsforfiles = [n for n in csetres['added'] if csetrelevantfilter(n)] |
111 mnodesforfiles = manres['added'] | 132 mnodesforfiles = manres['added'] |
112 manifestlinkrevs = manres['linkrevs'] | 133 manifestlinkrevs = manres['linkrevs'] |
113 | 134 |
114 # Find all file nodes referenced by added manifests and fetch those | 135 # Find all file nodes referenced by added manifests and fetch those |
115 # revisions. | 136 # revisions. |
116 fnodes = _derivefilesfrommanifests(repo, narrowmatcher, mnodesforfiles) | 137 fnodes = _derivefilesfrommanifests(repo, narrowmatcher, mnodesforfiles) |
117 _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csetsforfiles, | 138 _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csetsforfiles, |
118 manifestlinkrevs) | 139 manifestlinkrevs, shallow=bool(pullop.depth)) |
119 | 140 |
120 def _checkuserawstorefiledata(pullop): | 141 def _checkuserawstorefiledata(pullop): |
121 """Check whether we should use rawstorefiledata command to retrieve data.""" | 142 """Check whether we should use rawstorefiledata command to retrieve data.""" |
122 | 143 |
123 repo = pullop.repo | 144 repo = pullop.repo |
562 iterrevisions(objs, progress), | 583 iterrevisions(objs, progress), |
563 locallinkrevs[path].__getitem__, | 584 locallinkrevs[path].__getitem__, |
564 weakref.proxy(tr)) | 585 weakref.proxy(tr)) |
565 | 586 |
566 def _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csets, | 587 def _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csets, |
567 manlinkrevs): | 588 manlinkrevs, shallow=False): |
568 """Fetch file data from explicit changeset revisions.""" | 589 """Fetch file data from explicit changeset revisions.""" |
569 | 590 |
570 def iterrevisions(objs, remaining, progress): | 591 def iterrevisions(objs, remaining, progress): |
571 while remaining: | 592 while remaining: |
572 filerevision = next(objs) | 593 filerevision = next(objs) |
586 revision = extrafields[b'revision'] | 607 revision = extrafields[b'revision'] |
587 delta = mdiff.trivialdiffheader(len(revision)) + revision | 608 delta = mdiff.trivialdiffheader(len(revision)) + revision |
588 else: | 609 else: |
589 continue | 610 continue |
590 | 611 |
612 if b'linknode' in filerevision: | |
613 linknode = filerevision[b'linknode'] | |
614 else: | |
615 linknode = node | |
616 | |
591 yield ( | 617 yield ( |
592 node, | 618 node, |
593 filerevision[b'parents'][0], | 619 filerevision[b'parents'][0], |
594 filerevision[b'parents'][1], | 620 filerevision[b'parents'][1], |
595 node, | 621 linknode, |
596 basenode, | 622 basenode, |
597 delta, | 623 delta, |
598 # Flags not yet supported. | 624 # Flags not yet supported. |
599 0, | 625 0, |
600 ) | 626 ) |
606 _('files'), unit=_('chunks'), | 632 _('files'), unit=_('chunks'), |
607 total=sum(len(v) for v in fnodes.itervalues())) | 633 total=sum(len(v) for v in fnodes.itervalues())) |
608 | 634 |
609 commandmeta = remote.apidescriptor[b'commands'][b'filesdata'] | 635 commandmeta = remote.apidescriptor[b'commands'][b'filesdata'] |
610 batchsize = commandmeta.get(b'recommendedbatchsize', 50000) | 636 batchsize = commandmeta.get(b'recommendedbatchsize', 50000) |
637 | |
638 shallowfiles = repository.REPO_FEATURE_SHALLOW_FILE_STORAGE in repo.features | |
639 fields = {b'parents', b'revision'} | |
640 clrev = repo.changelog.rev | |
641 | |
642 # There are no guarantees that we'll have ancestor revisions if | |
643 # a) this repo has shallow file storage b) shallow data fetching is enabled. | |
644 # Force remote to not delta against possibly unknown revisions when these | |
645 # conditions hold. | |
646 haveparents = not (shallowfiles or shallow) | |
647 | |
648 # Similarly, we may not have calculated linkrevs for all incoming file | |
649 # revisions. Ask the remote to do work for us in this case. | |
650 if not haveparents: | |
651 fields.add(b'linknode') | |
611 | 652 |
612 for i in pycompat.xrange(0, len(csets), batchsize): | 653 for i in pycompat.xrange(0, len(csets), batchsize): |
613 batch = [x for x in csets[i:i + batchsize]] | 654 batch = [x for x in csets[i:i + batchsize]] |
614 if not batch: | 655 if not batch: |
615 continue | 656 continue |
618 args = { | 659 args = { |
619 b'revisions': [{ | 660 b'revisions': [{ |
620 b'type': b'changesetexplicit', | 661 b'type': b'changesetexplicit', |
621 b'nodes': batch, | 662 b'nodes': batch, |
622 }], | 663 }], |
623 b'fields': {b'parents', b'revision'}, | 664 b'fields': fields, |
624 b'haveparents': True, | 665 b'haveparents': haveparents, |
625 } | 666 } |
626 | 667 |
627 if pathfilter: | 668 if pathfilter: |
628 args[b'pathfilter'] = pathfilter | 669 args[b'pathfilter'] = pathfilter |
629 | 670 |
641 | 682 |
642 linkrevs = { | 683 linkrevs = { |
643 fnode: manlinkrevs[mnode] | 684 fnode: manlinkrevs[mnode] |
644 for fnode, mnode in fnodes[path].iteritems()} | 685 for fnode, mnode in fnodes[path].iteritems()} |
645 | 686 |
687 def getlinkrev(node): | |
688 if node in linkrevs: | |
689 return linkrevs[node] | |
690 else: | |
691 return clrev(node) | |
692 | |
646 store.addgroup(iterrevisions(objs, header[b'totalitems'], | 693 store.addgroup(iterrevisions(objs, header[b'totalitems'], |
647 progress), | 694 progress), |
648 linkrevs.__getitem__, | 695 getlinkrev, |
649 weakref.proxy(tr)) | 696 weakref.proxy(tr), |
697 maybemissingparents=shallow) |