changegroup: don't try to build changelog chunks if not required
authorPulkit Goyal <pulkit@yandex-team.ru>
Tue, 29 Jan 2019 16:03:52 +0300
changeset 41448 fa7d61f9c512
parent 41445 73a33fe625bb
child 41449 bc776c31c093
changegroup: don't try to build changelog chunks if not required When we extend a narrow clone without ellipsis, we don't download changelog information because that's already present with the client. However we still try to build that chunk stream. Building that chunk stream involves calling a lookup function and store.emitrevisions() API. The lookup function is called len(cl) number of times. On large repositories, where len(cl) is in millions, calling that lookup function is not a good idea. Also it's not required to use the store.emitrevisons() API because we already have nodes present which we can use. This patch short-circuits state building logic if we are processing a non-ellipsis case and changelog is not required. This saves up ~20 seconds on our internal repo for a single extend call. Differential Revision: https://phab.mercurial-scm.org/D5733
mercurial/changegroup.py
--- a/mercurial/changegroup.py	Tue Jan 29 15:43:02 2019 +0300
+++ b/mercurial/changegroup.py	Tue Jan 29 16:03:52 2019 +0300
@@ -817,13 +817,13 @@
         self._verbosenote(_('uncompressed size of bundle content:\n'))
         size = 0
 
-        clstate, deltas = self._generatechangelog(cl, clnodes)
+        clstate, deltas = self._generatechangelog(cl, clnodes,
+                                                  generate=changelog)
         for delta in deltas:
-            if changelog:
-                for chunk in _revisiondeltatochunks(delta,
-                                                    self._builddeltaheader):
-                    size += len(chunk)
-                    yield chunk
+            for chunk in _revisiondeltatochunks(delta,
+                                                self._builddeltaheader):
+                size += len(chunk)
+                yield chunk
 
         close = closechunk()
         size += len(close)
@@ -917,12 +917,15 @@
         if clnodes:
             repo.hook('outgoing', node=hex(clnodes[0]), source=source)
 
-    def _generatechangelog(self, cl, nodes):
+    def _generatechangelog(self, cl, nodes, generate=True):
         """Generate data for changelog chunks.
 
         Returns a 2-tuple of a dict containing state and an iterable of
         byte chunks. The state will not be fully populated until the
         chunk stream has been fully consumed.
+
+        if generate is False, the state will be fully populated and no chunk
+        stream will be yielded
         """
         clrevorder = {}
         manifests = {}
@@ -937,6 +940,20 @@
             'clrevtomanifestrev': clrevtomanifestrev,
         }
 
+        if not (generate or self._ellipses):
+            # sort the nodes in storage order
+            nodes = sorted(nodes, key=cl.rev)
+            for node in nodes:
+                c = cl.changelogrevision(node)
+                clrevorder[node] = len(clrevorder)
+                # record the first changeset introducing this manifest version
+                manifests.setdefault(c.manifest, node)
+                # Record a complete list of potentially-changed files in
+                # this manifest.
+                changedfiles.update(c.files)
+
+            return state, ()
+
         # Callback for the changelog, used to collect changed files and
         # manifest nodes.
         # Returns the linkrev node (identity in the changelog case).