changegroup: refactor delta parent code
authorGregory Szorc <gregory.szorc@gmail.com>
Wed, 08 Aug 2018 20:17:48 -0700
changeset 39055 ef3d3a2f9aa5
parent 39054 39b8277e2115
child 39056 e793e11e1462
changegroup: refactor delta parent code We had recently abstracted the delta parent functions to facilitate extracting code from cgpacker. Now that we're in a better place, it is time to revisit the design. Changegroup version 1 requires that the previous node be used as the delta parent. Later versions allow any available node to be used as the base. In the case where an arbitrary parent can be used, the choice of a delta parent is best left in the hands of the storage backend. So it makes sense for the delta parent selection to be hidden away in the storage layer. This means deferring the choice of the delta parent selection function to as close to delta generation time as possible. This commit moves the delta selection logic to essentially just before delta generation. However, because changegroup version 1 limits what we can do, we have retained the ability to force a delta against the previous revision. As part of this, I realized that the ellipsis parent function was unused! That's because ellipsis mode always sends full revisions and not deltas. Differential Revision: https://phab.mercurial-scm.org/D4214
mercurial/changegroup.py
--- a/mercurial/changegroup.py	Wed Aug 08 16:01:26 2018 -0700
+++ b/mercurial/changegroup.py	Wed Aug 08 20:17:48 2018 -0700
@@ -587,11 +587,37 @@
     key = lambda n: cl.rev(lookup(n))
     return [store.rev(n) for n in sorted(nodes, key=key)]
 
-def _revisiondeltanormal(store, rev, prev, linknode, deltaparentfn):
+def _revisiondeltanormal(store, rev, prev, linknode, forcedeltaparentprev):
     """Construct a revision delta for non-ellipses changegroup generation."""
     node = store.node(rev)
     p1, p2 = store.parentrevs(rev)
-    base = deltaparentfn(store, rev, p1, p2, prev)
+
+    if forcedeltaparentprev:
+        base = prev
+    else:
+        dp = store.deltaparent(rev)
+
+        if dp == nullrev and store.storedeltachains:
+            # Avoid sending full revisions when delta parent is null. Pick prev
+            # in that case. It's tempting to pick p1 in this case, as p1 will
+            # be smaller in the common case. However, computing a delta against
+            # p1 may require resolving the raw text of p1, which could be
+            # expensive. The revlog caches should have prev cached, meaning
+            # less CPU for changegroup generation. There is likely room to add
+            # a flag and/or config option to control this behavior.
+            base = prev
+        elif dp == nullrev:
+            # revlog is configured to use full snapshot for a reason,
+            # stick to full snapshot.
+            base = nullrev
+        elif dp not in (p1, p2, prev):
+            # Pick prev when we can't be sure remote has the base revision.
+            base = prev
+        else:
+            base = dp
+
+        if base != nullrev and not store.candelta(base, rev):
+            base = nullrev
 
     revision = None
     delta = None
@@ -719,7 +745,7 @@
         delta=None,
     )
 
-def deltagroup(repo, revs, store, ischangelog, lookup, deltaparentfn,
+def deltagroup(repo, revs, store, ischangelog, lookup, forcedeltaparentprev,
                units=None,
                ellipses=False, clrevtolocalrev=None, fullclnodes=None,
                precomputedellipsis=None):
@@ -761,7 +787,7 @@
             # corresponds to was a full changeset.
             if linknode in fullclnodes:
                 delta = _revisiondeltanormal(store, curr, prev, linknode,
-                                             deltaparentfn)
+                                             forcedeltaparentprev)
             elif linkrev not in precomputedellipsis:
                 delta = None
             else:
@@ -771,7 +797,7 @@
                     precomputedellipsis)
         else:
             delta = _revisiondeltanormal(store, curr, prev, linknode,
-                                         deltaparentfn)
+                                         forcedeltaparentprev)
 
         if delta:
             yield delta
@@ -781,7 +807,8 @@
 
 class cgpacker(object):
     def __init__(self, repo, filematcher, version, allowreorder,
-                 deltaparentfn, builddeltaheader, manifestsend,
+                 builddeltaheader, manifestsend,
+                 forcedeltaparentprev=False,
                  bundlecaps=None, ellipses=False,
                  shallow=False, ellipsisroots=None, fullnodes=None):
         """Given a source repo, construct a bundler.
@@ -793,8 +820,9 @@
         This value is used when ``bundle.reorder`` is ``auto`` or isn't
         set.
 
-        deltaparentfn is a callable that resolves the delta parent for
-        a specific revision.
+        forcedeltaparentprev indicates whether delta parents must be against
+        the previous revision in a delta group. This should only be used for
+        compatibility with changegroup version 1.
 
         builddeltaheader is a callable that constructs the header for a group
         delta.
@@ -820,7 +848,7 @@
         self._filematcher = filematcher
 
         self.version = version
-        self._deltaparentfn = deltaparentfn
+        self._forcedeltaparentprev = forcedeltaparentprev
         self._builddeltaheader = builddeltaheader
         self._manifestsend = manifestsend
         self._ellipses = ellipses
@@ -1025,7 +1053,7 @@
 
         gen = deltagroup(
             self._repo, revs, cl, True, lookupcl,
-            self._deltaparentfn,
+            self._forcedeltaparentprev,
             ellipses=self._ellipses,
             units=_('changesets'),
             clrevtolocalrev={},
@@ -1114,7 +1142,7 @@
 
             deltas = deltagroup(
                 self._repo, revs, store, False, lookupfn,
-                self._deltaparentfn,
+                self._forcedeltaparentprev,
                 ellipses=self._ellipses,
                 units=_('manifests'),
                 clrevtolocalrev=clrevtolocalrev,
@@ -1206,7 +1234,7 @@
 
                 deltas = deltagroup(
                     self._repo, revs, filerevlog, False, lookupfilelog,
-                    self._deltaparentfn,
+                    self._forcedeltaparentprev,
                     ellipses=self._ellipses,
                     clrevtolocalrev=clrevtolocalrev,
                     fullclnodes=self._fullclnodes,
@@ -1216,65 +1244,16 @@
 
         progress.complete()
 
-def _deltaparentprev(store, rev, p1, p2, prev):
-    """Resolve a delta parent to the previous revision.
-
-    Used for version 1 changegroups, which don't support generaldelta.
-    """
-    return prev
-
-def _deltaparentgeneraldelta(store, rev, p1, p2, prev):
-    """Resolve a delta parent when general deltas are supported."""
-    dp = store.deltaparent(rev)
-    if dp == nullrev and store.storedeltachains:
-        # Avoid sending full revisions when delta parent is null. Pick prev
-        # in that case. It's tempting to pick p1 in this case, as p1 will
-        # be smaller in the common case. However, computing a delta against
-        # p1 may require resolving the raw text of p1, which could be
-        # expensive. The revlog caches should have prev cached, meaning
-        # less CPU for changegroup generation. There is likely room to add
-        # a flag and/or config option to control this behavior.
-        base = prev
-    elif dp == nullrev:
-        # revlog is configured to use full snapshot for a reason,
-        # stick to full snapshot.
-        base = nullrev
-    elif dp not in (p1, p2, prev):
-        # Pick prev when we can't be sure remote has the base revision.
-        return prev
-    else:
-        base = dp
-
-    if base != nullrev and not store.candelta(base, rev):
-        base = nullrev
-
-    return base
-
-def _deltaparentellipses(store, rev, p1, p2, prev):
-    """Resolve a delta parent when in ellipses mode."""
-    # TODO: send better deltas when in narrow mode.
-    #
-    # changegroup.group() loops over revisions to send,
-    # including revisions we'll skip. What this means is that
-    # `prev` will be a potentially useless delta base for all
-    # ellipsis nodes, as the client likely won't have it. In
-    # the future we should do bookkeeping about which nodes
-    # have been sent to the client, and try to be
-    # significantly smarter about delta bases. This is
-    # slightly tricky because this same code has to work for
-    # all revlogs, and we don't have the linkrev/linknode here.
-    return p1
-
 def _makecg1packer(repo, filematcher, bundlecaps, ellipses=False,
                    shallow=False, ellipsisroots=None, fullnodes=None):
     builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
         d.node, d.p1node, d.p2node, d.linknode)
 
     return cgpacker(repo, filematcher, b'01',
-                    deltaparentfn=_deltaparentprev,
                     allowreorder=None,
                     builddeltaheader=builddeltaheader,
                     manifestsend=b'',
+                    forcedeltaparentprev=True,
                     bundlecaps=bundlecaps,
                     ellipses=ellipses,
                     shallow=shallow,
@@ -1290,7 +1269,6 @@
     # generally doesn't help, so we disable it by default (treating
     # bundle.reorder=auto just like bundle.reorder=False).
     return cgpacker(repo, filematcher, b'02',
-                    deltaparentfn=_deltaparentgeneraldelta,
                     allowreorder=False,
                     builddeltaheader=builddeltaheader,
                     manifestsend=b'',
@@ -1305,11 +1283,7 @@
     builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
         d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags)
 
-    deltaparentfn = (_deltaparentellipses if ellipses
-                     else _deltaparentgeneraldelta)
-
     return cgpacker(repo, filematcher, b'03',
-                    deltaparentfn=deltaparentfn,
                     allowreorder=False,
                     builddeltaheader=builddeltaheader,
                     manifestsend=closechunk(),