changegroup: port to emitrevisions() (issue5976)
authorGregory Szorc <gregory.szorc@gmail.com>
Fri, 21 Sep 2018 18:47:04 -0700
changeset 39865 31b7e8e7132e
parent 39864 7b752bf08435
child 39866 e23c03dc5cf9
changegroup: port to emitrevisions() (issue5976) We now have a unified API for emitting revision data from a storage backend. It handles sorting nodes and the complicated delta versus revision decisions for us. This commit ports changegroup to that API. There should be no behavior changes for changegroups not using ellipsis. And lack of test changes seems to confirm that. There are some changes for ellipsis mode, however. Before, when sending an ellipsis revision, we would always send a fulltext revision (as opposed to a delta). There was a TODO tracking this open item. One of the things the emitrevisions() API does for us is figure out whether we can safely emit a delta. So, it is now possible for ellipsis revisions to be sent as deltas! (It does this by not assuming parent/ancestor revisions are available and tracking which revisions have been sent out.) Because we eliminated the list of revision delta request objects, performance has improved substantially: $ hg perfchangegroupchangelog before: ! wall 24.348077 comb 24.330000 user 24.140000 sys 0.190000 (best of 3) after: ! wall 18.245911 comb 18.240000 user 18.100000 sys 0.140000 (best of 3) That's a lot of overhead for creating a few hundred thousand Python objects! This is still a little slower than 4.7. Probably due to 23d582ca introducing a type for the revision/delta results. There is potentially room to optimize. But at some point we need to abstract storage in order to support alternate storage backends. Unfortunately that means using a Python data structure to represent results. And unfortunately there is overhead with every new Python object created. Differential Revision: https://phab.mercurial-scm.org/D4725
mercurial/changegroup.py
--- a/mercurial/changegroup.py	Mon Sep 24 09:48:02 2018 -0700
+++ b/mercurial/changegroup.py	Fri Sep 21 18:47:04 2018 -0700
@@ -24,13 +24,13 @@
 )
 
 from . import (
-    dagop,
     error,
     match as matchmod,
     mdiff,
     phases,
     pycompat,
     repository,
+    revlog,
     util,
 )
 
@@ -536,18 +536,8 @@
         yield prefix
     yield data
 
-def _sortnodesnormal(store, nodes):
-    """Sort nodes for changegroup generation and turn into revnums."""
-    # for generaldelta revlogs, we linearize the revs; this will both be
-    # much quicker and generate a much smaller bundle
-    if store._generaldelta:
-        revs = set(store.rev(n) for n in nodes)
-        return dagop.linearize(revs, store.parentrevs)
-    else:
-        return sorted([store.rev(n) for n in nodes])
-
 def _sortnodesellipsis(store, nodes, cl, lookup):
-    """Sort nodes for changegroup generation and turn into revnums."""
+    """Sort nodes for changegroup generation."""
     # Ellipses serving mode.
     #
     # In a perfect world, we'd generate better ellipsis-ified graphs
@@ -565,11 +555,11 @@
     # changelog, so what we do is we sort the non-changelog histories
     # by the order in which they are used by the changelog.
     key = lambda n: cl.rev(lookup(n))
-    return [store.rev(n) for n in sorted(nodes, key=key)]
+    return sorted(nodes, key=key)
 
-def _makenarrowdeltarequest(cl, store, ischangelog, rev, node, linkrev,
-                            linknode, clrevtolocalrev, fullclnodes,
-                            precomputedellipsis):
+def _resolvenarrowrevisioninfo(cl, store, ischangelog, rev, linkrev,
+                               linknode, clrevtolocalrev, fullclnodes,
+                               precomputedellipsis):
     linkparents = precomputedellipsis[linkrev]
     def local(clrev):
         """Turn a changelog revnum into a local revnum.
@@ -644,15 +634,7 @@
 
     p1node, p2node = store.node(p1), store.node(p2)
 
-    # TODO: try and actually send deltas for ellipsis data blocks
-    return revisiondeltarequest(
-        node=node,
-        p1node=p1node,
-        p2node=p2node,
-        linknode=linknode,
-        basenode=nullid,
-        ellipsis=True,
-    )
+    return p1node, p2node, linknode
 
 def deltagroup(repo, store, nodes, ischangelog, lookup, forcedeltaparentprev,
                topic=None,
@@ -668,87 +650,97 @@
     if not nodes:
         return
 
-    # We perform two passes over the revisions whose data we will emit.
-    #
-    # In the first pass, we obtain information about the deltas that will
-    # be generated. This involves computing linknodes and adjusting the
-    # request to take shallow fetching into account. The end result of
-    # this pass is a list of "request" objects stating which deltas
-    # to obtain.
-    #
-    # The second pass is simply resolving the requested deltas.
-
     cl = repo.changelog
 
     if ischangelog:
-        # Changelog doesn't benefit from reordering revisions. So send
-        # out revisions in store order.
-        # TODO the API would be cleaner if this were controlled by the
-        # store producing the deltas.
-        revs = sorted(cl.rev(n) for n in nodes)
+        # `hg log` shows changesets in storage order. To preserve order
+        # across clones, send out changesets in storage order.
+        nodesorder = 'storage'
     elif ellipses:
-        revs = _sortnodesellipsis(store, nodes, cl, lookup)
+        nodes = _sortnodesellipsis(store, nodes, cl, lookup)
+        nodesorder = 'nodes'
     else:
-        revs = _sortnodesnormal(store, nodes)
+        nodesorder = None
 
-    # In the first pass, collect info about the deltas we'll be
-    # generating.
-    requests = []
-
-    # Add the parent of the first rev.
-    revs.insert(0, store.parentrevs(revs[0])[0])
+    # Perform ellipses filtering and revision massaging. We do this before
+    # emitrevisions() because a) filtering out revisions creates less work
+    # for emitrevisions() b) dropping revisions would break emitrevisions()'s
+    # assumptions about delta choices and we would possibly send a delta
+    # referencing a missing base revision.
+    #
+    # Also, calling lookup() has side-effects with regards to populating
+    # data structures. If we don't call lookup() for each node or if we call
+    # lookup() after the first pass through each node, things can break -
+    # possibly intermittently depending on the python hash seed! For that
+    # reason, we store a mapping of all linknodes during the initial node
+    # pass rather than use lookup() on the output side.
+    if ellipses:
+        filtered = []
+        adjustedparents = {}
+        linknodes = {}
 
-    for i in pycompat.xrange(len(revs) - 1):
-        prev = revs[i]
-        curr = revs[i + 1]
-
-        node = store.node(curr)
-        linknode = lookup(node)
-        p1node, p2node = store.parents(node)
-
-        if ellipses:
+        for node in nodes:
+            rev = store.rev(node)
+            linknode = lookup(node)
             linkrev = cl.rev(linknode)
-            clrevtolocalrev[linkrev] = curr
+            clrevtolocalrev[linkrev] = rev
 
-            # This is a node to send in full, because the changeset it
-            # corresponds to was a full changeset.
+            # If linknode is in fullclnodes, it means the corresponding
+            # changeset was a full changeset and is being sent unaltered.
             if linknode in fullclnodes:
-                requests.append(revisiondeltarequest(
-                    node=node,
-                    p1node=p1node,
-                    p2node=p2node,
-                    linknode=linknode,
-                    basenode=None,
-                ))
+                linknodes[node] = linknode
 
+            # If the corresponding changeset wasn't in the set computed
+            # as relevant to us, it should be dropped outright.
             elif linkrev not in precomputedellipsis:
-                pass
+                continue
+
             else:
-                requests.append(_makenarrowdeltarequest(
-                    cl, store, ischangelog, curr, node, linkrev, linknode,
-                    clrevtolocalrev, fullclnodes,
-                    precomputedellipsis))
-        else:
-            requests.append(revisiondeltarequest(
-                node=node,
-                p1node=p1node,
-                p2node=p2node,
-                linknode=linknode,
-                basenode=store.node(prev) if forcedeltaparentprev else None,
-            ))
+                # We could probably do this later and avoid the dict
+                # holding state. But it likely doesn't matter.
+                p1node, p2node, linknode = _resolvenarrowrevisioninfo(
+                    cl, store, ischangelog, rev, linkrev, linknode,
+                    clrevtolocalrev, fullclnodes, precomputedellipsis)
+
+                adjustedparents[node] = (p1node, p2node)
+                linknodes[node] = linknode
+
+            filtered.append(node)
+
+        nodes = filtered
 
     # We expect the first pass to be fast, so we only engage the progress
     # meter for constructing the revision deltas.
     progress = None
     if topic is not None:
         progress = repo.ui.makeprogress(topic, unit=_('chunks'),
-                                        total=len(requests))
+                                        total=len(nodes))
 
-    for i, delta in enumerate(store.emitrevisiondeltas(requests)):
+    revisions = store.emitrevisions(
+        nodes,
+        nodesorder=nodesorder,
+        revisiondata=True,
+        assumehaveparentrevisions=not ellipses,
+        deltaprevious=forcedeltaparentprev)
+
+    for i, revision in enumerate(revisions):
         if progress:
             progress.update(i + 1)
 
-        yield delta
+        if ellipses:
+            linknode = linknodes[revision.node]
+
+            if revision.node in adjustedparents:
+                p1node, p2node = adjustedparents[revision.node]
+                revision.p1node = p1node
+                revision.p2node = p2node
+                revision.flags |= revlog.REVIDX_ELLIPSIS
+
+        else:
+            linknode = lookup(revision.node)
+
+        revision.linknode = linknode
+        yield revision
 
     if progress:
         progress.complete()