# HG changeset patch # User Gregory Szorc # Date 1533315926 25200 # Node ID 23d582caae307662a604f72b1a19bf2f71c221db # Parent eb022ce9e505acafc725425e94fc413e47cfc0d8 changegroup: capture revision delta in a data structure The current changegroup generation code is tightly coupled to the revlog API. This tight coupling makes it difficult to implement alternate storage backends without requiring a large surface area of the revlog API to be exposed. This is not desirable. In order to support changegroup generation with non-revlog storage, we'll need to abstract the concept of delta generation. This commit is the first step down that road. We introduce a data structure for representing a delta in a changegroup. The API still leaves a lot to be desired. But at least we now have separation between data and actions performed on it. As part of this, we tweak behavior slightly: we no longer concatenate the delta prefix with the metadata header. Instead, we track and emit the prefix as a separate chunk. This shouldn't have any meaningful impact since all the chunks just get sent to the wire, the compressor, etc. Because we're introducing a new object, this does add some overhead to changegroup execution. `hg perfchangegroupchangelog` on my clone of the Mercurial repo (~40,000 visible revisions in the changelog) slows down a bit: ! wall 1.268600 comb 1.270000 user 1.270000 sys 0.000000 (best of 8) ! wall 1.419479 comb 1.410000 user 1.410000 sys 0.000000 (best of 8) With for `hg bundle -t none-v2 -a /dev/null`: before: real 6.610 secs (user 6.460+0.000 sys 0.140+0.000) after: real 7.210 secs (user 7.060+0.000 sys 0.140+0.000) I plan to claw back this regression in future commits. And I may even do away with this data structure once the refactor is complete. For now, it makes things easier to comprehend. Differential Revision: https://phab.mercurial-scm.org/D4075 diff -r eb022ce9e505 -r 23d582caae30 mercurial/changegroup.py --- a/mercurial/changegroup.py Thu Aug 02 16:36:40 2018 -0700 +++ b/mercurial/changegroup.py Fri Aug 03 10:05:26 2018 -0700 @@ -19,6 +19,10 @@ short, ) +from .thirdparty import ( + attr, +) + from . import ( dagutil, error, @@ -494,6 +498,26 @@ return d return readexactly(self._fh, n) +@attr.s(slots=True, frozen=True) +class revisiondelta(object): + """Describes a delta entry in a changegroup. + + Captured data is sufficient to serialize the delta into multiple + formats. + """ + # 20 byte node of this revision. + node = attr.ib() + # 20 byte nodes of parent revisions. + p1node = attr.ib() + p2node = attr.ib() + # 20 byte node of node this delta is against. + basenode = attr.ib() + # 20 byte node of changeset revision this delta is associated with. + linknode = attr.ib() + # 2 bytes of flags to apply to revision data. + flags = attr.ib() + # Iterable of chunks holding raw delta data. + deltachunks = attr.ib() class cg1packer(object): deltaheader = _CHANGEGROUPV1_DELTA_HEADER @@ -899,13 +923,25 @@ def revchunk(self, store, rev, prev, linknode): if util.safehasattr(self, 'full_nodes'): - fn = self._revchunknarrow + fn = self._revisiondeltanarrow else: - fn = self._revchunknormal + fn = self._revisiondeltanormal + + delta = fn(store, rev, prev, linknode) + if not delta: + return - return fn(store, rev, prev, linknode) + meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node, + delta.basenode, delta.linknode, + delta.flags) + l = len(meta) + sum(len(x) for x in delta.deltachunks) - def _revchunknormal(self, store, rev, prev, linknode): + yield chunkheader(l) + yield meta + for x in delta.deltachunks: + yield x + + def _revisiondeltanormal(self, store, rev, prev, linknode): node = store.node(rev) p1, p2 = store.parentrevs(rev) base = self.deltaparent(store, rev, p1, p2, prev) @@ -927,16 +963,18 @@ else: delta = store.revdiff(base, rev) p1n, p2n = store.parents(node) - basenode = store.node(base) - flags = store.flags(rev) - meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags) - meta += prefix - l = len(meta) + len(delta) - yield chunkheader(l) - yield meta - yield delta - def _revchunknarrow(self, store, rev, prev, linknode): + return revisiondelta( + node=node, + p1node=p1n, + p2node=p2n, + basenode=store.node(base), + linknode=linknode, + flags=store.flags(rev), + deltachunks=(prefix, delta), + ) + + def _revisiondeltanarrow(self, store, rev, prev, linknode): # build up some mapping information that's useful later. See # the local() nested function below. if not self.changelog_done: @@ -950,9 +988,7 @@ # This is a node to send in full, because the changeset it # corresponds to was a full changeset. if linknode in self.full_nodes: - for x in self._revchunknormal(store, rev, prev, linknode): - yield x - return + return self._revisiondeltanormal(store, rev, prev, linknode) # At this point, a node can either be one we should skip or an # ellipsis. If it's not an ellipsis, bail immediately. @@ -1043,16 +1079,20 @@ p1n, p2n = store.node(p1), store.node(p2) flags = store.flags(rev) flags |= revlog.REVIDX_ELLIPSIS - meta = self.builddeltaheader( - n, p1n, p2n, nullid, linknode, flags) + # TODO: try and actually send deltas for ellipsis data blocks data = store.revision(n) diffheader = mdiff.trivialdiffheader(len(data)) - l = len(meta) + len(diffheader) + len(data) - yield ''.join((chunkheader(l), - meta, - diffheader, - data)) + + return revisiondelta( + node=n, + p1node=p1n, + p2node=p2n, + basenode=nullid, + linknode=linknode, + flags=flags, + deltachunks=(diffheader, data), + ) def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): # do nothing with basenode, it is implicitly the previous one in HG10