diff mercurial/exchangev2.py @ 40393:229d23cdb203

exchangev2: support fetching shallow files history This commit teaches the exchangev2 client code to handle fetching shallow files data. Only shallow fetching of files data is supported: shallow fetching of changeset and manifest data is explicitly not yet supported. Previously, we would fetch file revisions for changesets that were received by the current pull operation. In the new model, we calculate the set of "relevant" changesets given the pull depth and only fetch files data for those changesets. We also teach the "filesdata" command invocation to vary parameters as needed. The implementation here is far from complete or optimal. Subsequent pulls will end up re-fetching a lot of files data. But the application of this data should mostly be a no-op on the client, so it isn't a big deal. Depending on the order file revisions are fetched in, revisions could get inserted with the wrong revision number relationships. I think the best way to deal with this is to remove revision numbers from storage and to either dynamically derive them (by reconstructing a DAG from nodes/parents) or remove revision numbers from the file storage interface completely. A missing API that we'll likely want to write pretty soon is "ensure files for revision(s) are present." We can kind of cajole exchangev2.pull() to do this. But it isn't very efficient. For example, in simple cases like widening the store to obtain data for a single revision, it is probably more efficient to walk the manifest and find exactly which file revisions are missing and to make explicit requests for just their data. In more advanced cases, asking the server for all files data may be more efficient, even though it requires sending data the client already has. There is tons of room for future experimentation here. And TBH I'm not sure what the final state will be. Anyway, this commit gets us pretty close to being able to have shallow and narrow checkouts with exchangev2/sqlite storage. Close enough that a minimal extension should be able to provide fill in the gaps until the code in core stabilizes and there is a user-facing way to trigger the narrow/shallow bits from `hg clone` without also implying using of the narrow extension... Differential Revision: https://phab.mercurial-scm.org/D5169
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 19 Oct 2018 12:30:49 +0200
parents 00a4cd368e3f
children afa884015e66
line wrap: on
line diff
--- a/mercurial/exchangev2.py	Wed Oct 17 17:32:15 2018 +0200
+++ b/mercurial/exchangev2.py	Fri Oct 19 12:30:49 2018 +0200
@@ -22,6 +22,7 @@
     narrowspec,
     phases,
     pycompat,
+    repository,
     setdiscovery,
 )
 
@@ -91,6 +92,21 @@
 
     manres = _fetchmanifests(repo, tr, remote, csetres['manifestnodes'])
 
+    # We don't properly support shallow changeset and manifest yet. So we apply
+    # depth limiting locally.
+    if pullop.depth:
+        relevantcsetnodes = set()
+        clnode = repo.changelog.node
+
+        for rev in repo.revs(b'ancestors(%ln, %d)',
+                             pullheads, pullop.depth - 1):
+            relevantcsetnodes.add(clnode(rev))
+
+        csetrelevantfilter = lambda n: n in relevantcsetnodes
+
+    else:
+        csetrelevantfilter = lambda n: True
+
     # If obtaining the raw store files, we need to scan the full repo to
     # derive all the changesets, manifests, and linkrevs.
     if usingrawchangelogandmanifest:
@@ -100,14 +116,19 @@
 
         for rev in repo:
             ctx = repo[rev]
+            node = ctx.node()
+
+            if not csetrelevantfilter(node):
+                continue
+
             mnode = ctx.manifestnode()
 
-            csetsforfiles.append(ctx.node())
+            csetsforfiles.append(node)
             mnodesforfiles.append(mnode)
             manifestlinkrevs[mnode] = rev
 
     else:
-        csetsforfiles = csetres['added']
+        csetsforfiles = [n for n in csetres['added'] if csetrelevantfilter(n)]
         mnodesforfiles = manres['added']
         manifestlinkrevs = manres['linkrevs']
 
@@ -115,7 +136,7 @@
     # revisions.
     fnodes = _derivefilesfrommanifests(repo, narrowmatcher, mnodesforfiles)
     _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csetsforfiles,
-                         manifestlinkrevs)
+                         manifestlinkrevs, shallow=bool(pullop.depth))
 
 def _checkuserawstorefiledata(pullop):
     """Check whether we should use rawstorefiledata command to retrieve data."""
@@ -564,7 +585,7 @@
                     weakref.proxy(tr))
 
 def _fetchfilesfromcsets(repo, tr, remote, pathfilter, fnodes, csets,
-                         manlinkrevs):
+                         manlinkrevs, shallow=False):
     """Fetch file data from explicit changeset revisions."""
 
     def iterrevisions(objs, remaining, progress):
@@ -588,11 +609,16 @@
             else:
                 continue
 
+            if b'linknode' in filerevision:
+                linknode = filerevision[b'linknode']
+            else:
+                linknode = node
+
             yield (
                 node,
                 filerevision[b'parents'][0],
                 filerevision[b'parents'][1],
-                node,
+                linknode,
                 basenode,
                 delta,
                 # Flags not yet supported.
@@ -609,6 +635,21 @@
     commandmeta = remote.apidescriptor[b'commands'][b'filesdata']
     batchsize = commandmeta.get(b'recommendedbatchsize', 50000)
 
+    shallowfiles = repository.REPO_FEATURE_SHALLOW_FILE_STORAGE in repo.features
+    fields = {b'parents', b'revision'}
+    clrev = repo.changelog.rev
+
+    # There are no guarantees that we'll have ancestor revisions if
+    # a) this repo has shallow file storage b) shallow data fetching is enabled.
+    # Force remote to not delta against possibly unknown revisions when these
+    # conditions hold.
+    haveparents = not (shallowfiles or shallow)
+
+    # Similarly, we may not have calculated linkrevs for all incoming file
+    # revisions. Ask the remote to do work for us in this case.
+    if not haveparents:
+        fields.add(b'linknode')
+
     for i in pycompat.xrange(0, len(csets), batchsize):
         batch = [x for x in csets[i:i + batchsize]]
         if not batch:
@@ -620,8 +661,8 @@
                     b'type': b'changesetexplicit',
                     b'nodes': batch,
                 }],
-                b'fields': {b'parents', b'revision'},
-                b'haveparents': True,
+                b'fields': fields,
+                b'haveparents': haveparents,
             }
 
             if pathfilter:
@@ -643,7 +684,14 @@
                     fnode: manlinkrevs[mnode]
                     for fnode, mnode in fnodes[path].iteritems()}
 
+                def getlinkrev(node):
+                    if node in linkrevs:
+                        return linkrevs[node]
+                    else:
+                        return clrev(node)
+
                 store.addgroup(iterrevisions(objs, header[b'totalitems'],
                                              progress),
-                               linkrevs.__getitem__,
-                               weakref.proxy(tr))
+                               getlinkrev,
+                               weakref.proxy(tr),
+                               maybemissingparents=shallow)