Mercurial > hg
comparison mercurial/changegroup.py @ 38893:23d582caae30
changegroup: capture revision delta in a data structure
The current changegroup generation code is tightly coupled to
the revlog API. This tight coupling makes it difficult to implement
alternate storage backends without requiring a large surface area
of the revlog API to be exposed. This is not desirable.
In order to support changegroup generation with non-revlog storage,
we'll need to abstract the concept of delta generation.
This commit is the first step down that road. We introduce a
data structure for representing a delta in a changegroup.
The API still leaves a lot to be desired. But at least we now
have separation between data and actions performed on it.
As part of this, we tweak behavior slightly: we no longer
concatenate the delta prefix with the metadata header. Instead,
we track and emit the prefix as a separate chunk. This shouldn't
have any meaningful impact since all the chunks just get sent to
the wire, the compressor, etc.
Because we're introducing a new object, this does add some
overhead to changegroup execution. `hg perfchangegroupchangelog`
on my clone of the Mercurial repo (~40,000 visible revisions in
the changelog) slows down a bit:
! wall 1.268600 comb 1.270000 user 1.270000 sys 0.000000 (best of 8)
! wall 1.419479 comb 1.410000 user 1.410000 sys 0.000000 (best of 8)
With for `hg bundle -t none-v2 -a /dev/null`:
before: real 6.610 secs (user 6.460+0.000 sys 0.140+0.000)
after: real 7.210 secs (user 7.060+0.000 sys 0.140+0.000)
I plan to claw back this regression in future commits. And I may
even do away with this data structure once the refactor is complete.
For now, it makes things easier to comprehend.
Differential Revision: https://phab.mercurial-scm.org/D4075
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Fri, 03 Aug 2018 10:05:26 -0700 |
parents | eb022ce9e505 |
children | 19344024a8e1 |
comparison
equal
deleted
inserted
replaced
38892:eb022ce9e505 | 38893:23d582caae30 |
---|---|
15 from .node import ( | 15 from .node import ( |
16 hex, | 16 hex, |
17 nullid, | 17 nullid, |
18 nullrev, | 18 nullrev, |
19 short, | 19 short, |
20 ) | |
21 | |
22 from .thirdparty import ( | |
23 attr, | |
20 ) | 24 ) |
21 | 25 |
22 from . import ( | 26 from . import ( |
23 dagutil, | 27 dagutil, |
24 error, | 28 error, |
492 if len(d) < n: | 496 if len(d) < n: |
493 d += readexactly(self._fh, n - len(d)) | 497 d += readexactly(self._fh, n - len(d)) |
494 return d | 498 return d |
495 return readexactly(self._fh, n) | 499 return readexactly(self._fh, n) |
496 | 500 |
501 @attr.s(slots=True, frozen=True) | |
502 class revisiondelta(object): | |
503 """Describes a delta entry in a changegroup. | |
504 | |
505 Captured data is sufficient to serialize the delta into multiple | |
506 formats. | |
507 """ | |
508 # 20 byte node of this revision. | |
509 node = attr.ib() | |
510 # 20 byte nodes of parent revisions. | |
511 p1node = attr.ib() | |
512 p2node = attr.ib() | |
513 # 20 byte node of node this delta is against. | |
514 basenode = attr.ib() | |
515 # 20 byte node of changeset revision this delta is associated with. | |
516 linknode = attr.ib() | |
517 # 2 bytes of flags to apply to revision data. | |
518 flags = attr.ib() | |
519 # Iterable of chunks holding raw delta data. | |
520 deltachunks = attr.ib() | |
497 | 521 |
498 class cg1packer(object): | 522 class cg1packer(object): |
499 deltaheader = _CHANGEGROUPV1_DELTA_HEADER | 523 deltaheader = _CHANGEGROUPV1_DELTA_HEADER |
500 version = '01' | 524 version = '01' |
501 def __init__(self, repo, filematcher, bundlecaps=None): | 525 def __init__(self, repo, filematcher, bundlecaps=None): |
897 raise error.ProgrammingError('cg1 should not be used in this case') | 921 raise error.ProgrammingError('cg1 should not be used in this case') |
898 return prev | 922 return prev |
899 | 923 |
900 def revchunk(self, store, rev, prev, linknode): | 924 def revchunk(self, store, rev, prev, linknode): |
901 if util.safehasattr(self, 'full_nodes'): | 925 if util.safehasattr(self, 'full_nodes'): |
902 fn = self._revchunknarrow | 926 fn = self._revisiondeltanarrow |
903 else: | 927 else: |
904 fn = self._revchunknormal | 928 fn = self._revisiondeltanormal |
905 | 929 |
906 return fn(store, rev, prev, linknode) | 930 delta = fn(store, rev, prev, linknode) |
907 | 931 if not delta: |
908 def _revchunknormal(self, store, rev, prev, linknode): | 932 return |
933 | |
934 meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node, | |
935 delta.basenode, delta.linknode, | |
936 delta.flags) | |
937 l = len(meta) + sum(len(x) for x in delta.deltachunks) | |
938 | |
939 yield chunkheader(l) | |
940 yield meta | |
941 for x in delta.deltachunks: | |
942 yield x | |
943 | |
944 def _revisiondeltanormal(self, store, rev, prev, linknode): | |
909 node = store.node(rev) | 945 node = store.node(rev) |
910 p1, p2 = store.parentrevs(rev) | 946 p1, p2 = store.parentrevs(rev) |
911 base = self.deltaparent(store, rev, p1, p2, prev) | 947 base = self.deltaparent(store, rev, p1, p2, prev) |
912 | 948 |
913 prefix = '' | 949 prefix = '' |
925 delta = store.revision(node, raw=True) | 961 delta = store.revision(node, raw=True) |
926 prefix = mdiff.trivialdiffheader(len(delta)) | 962 prefix = mdiff.trivialdiffheader(len(delta)) |
927 else: | 963 else: |
928 delta = store.revdiff(base, rev) | 964 delta = store.revdiff(base, rev) |
929 p1n, p2n = store.parents(node) | 965 p1n, p2n = store.parents(node) |
930 basenode = store.node(base) | 966 |
931 flags = store.flags(rev) | 967 return revisiondelta( |
932 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags) | 968 node=node, |
933 meta += prefix | 969 p1node=p1n, |
934 l = len(meta) + len(delta) | 970 p2node=p2n, |
935 yield chunkheader(l) | 971 basenode=store.node(base), |
936 yield meta | 972 linknode=linknode, |
937 yield delta | 973 flags=store.flags(rev), |
938 | 974 deltachunks=(prefix, delta), |
939 def _revchunknarrow(self, store, rev, prev, linknode): | 975 ) |
976 | |
977 def _revisiondeltanarrow(self, store, rev, prev, linknode): | |
940 # build up some mapping information that's useful later. See | 978 # build up some mapping information that's useful later. See |
941 # the local() nested function below. | 979 # the local() nested function below. |
942 if not self.changelog_done: | 980 if not self.changelog_done: |
943 self.clnode_to_rev[linknode] = rev | 981 self.clnode_to_rev[linknode] = rev |
944 linkrev = rev | 982 linkrev = rev |
948 self.clrev_to_localrev[linkrev] = rev | 986 self.clrev_to_localrev[linkrev] = rev |
949 | 987 |
950 # This is a node to send in full, because the changeset it | 988 # This is a node to send in full, because the changeset it |
951 # corresponds to was a full changeset. | 989 # corresponds to was a full changeset. |
952 if linknode in self.full_nodes: | 990 if linknode in self.full_nodes: |
953 for x in self._revchunknormal(store, rev, prev, linknode): | 991 return self._revisiondeltanormal(store, rev, prev, linknode) |
954 yield x | |
955 return | |
956 | 992 |
957 # At this point, a node can either be one we should skip or an | 993 # At this point, a node can either be one we should skip or an |
958 # ellipsis. If it's not an ellipsis, bail immediately. | 994 # ellipsis. If it's not an ellipsis, bail immediately. |
959 if linkrev not in self.precomputed_ellipsis: | 995 if linkrev not in self.precomputed_ellipsis: |
960 return | 996 return |
1041 | 1077 |
1042 n = store.node(rev) | 1078 n = store.node(rev) |
1043 p1n, p2n = store.node(p1), store.node(p2) | 1079 p1n, p2n = store.node(p1), store.node(p2) |
1044 flags = store.flags(rev) | 1080 flags = store.flags(rev) |
1045 flags |= revlog.REVIDX_ELLIPSIS | 1081 flags |= revlog.REVIDX_ELLIPSIS |
1046 meta = self.builddeltaheader( | 1082 |
1047 n, p1n, p2n, nullid, linknode, flags) | |
1048 # TODO: try and actually send deltas for ellipsis data blocks | 1083 # TODO: try and actually send deltas for ellipsis data blocks |
1049 data = store.revision(n) | 1084 data = store.revision(n) |
1050 diffheader = mdiff.trivialdiffheader(len(data)) | 1085 diffheader = mdiff.trivialdiffheader(len(data)) |
1051 l = len(meta) + len(diffheader) + len(data) | 1086 |
1052 yield ''.join((chunkheader(l), | 1087 return revisiondelta( |
1053 meta, | 1088 node=n, |
1054 diffheader, | 1089 p1node=p1n, |
1055 data)) | 1090 p2node=p2n, |
1091 basenode=nullid, | |
1092 linknode=linknode, | |
1093 flags=flags, | |
1094 deltachunks=(diffheader, data), | |
1095 ) | |
1056 | 1096 |
1057 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): | 1097 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): |
1058 # do nothing with basenode, it is implicitly the previous one in HG10 | 1098 # do nothing with basenode, it is implicitly the previous one in HG10 |
1059 # do nothing with flags, it is implicitly 0 for cg1 and cg2 | 1099 # do nothing with flags, it is implicitly 0 for cg1 and cg2 |
1060 return struct.pack(self.deltaheader, node, p1n, p2n, linknode) | 1100 return struct.pack(self.deltaheader, node, p1n, p2n, linknode) |