comparison mercurial/changegroup.py @ 38893:23d582caae30

changegroup: capture revision delta in a data structure The current changegroup generation code is tightly coupled to the revlog API. This tight coupling makes it difficult to implement alternate storage backends without requiring a large surface area of the revlog API to be exposed. This is not desirable. In order to support changegroup generation with non-revlog storage, we'll need to abstract the concept of delta generation. This commit is the first step down that road. We introduce a data structure for representing a delta in a changegroup. The API still leaves a lot to be desired. But at least we now have separation between data and actions performed on it. As part of this, we tweak behavior slightly: we no longer concatenate the delta prefix with the metadata header. Instead, we track and emit the prefix as a separate chunk. This shouldn't have any meaningful impact since all the chunks just get sent to the wire, the compressor, etc. Because we're introducing a new object, this does add some overhead to changegroup execution. `hg perfchangegroupchangelog` on my clone of the Mercurial repo (~40,000 visible revisions in the changelog) slows down a bit: ! wall 1.268600 comb 1.270000 user 1.270000 sys 0.000000 (best of 8) ! wall 1.419479 comb 1.410000 user 1.410000 sys 0.000000 (best of 8) With for `hg bundle -t none-v2 -a /dev/null`: before: real 6.610 secs (user 6.460+0.000 sys 0.140+0.000) after: real 7.210 secs (user 7.060+0.000 sys 0.140+0.000) I plan to claw back this regression in future commits. And I may even do away with this data structure once the refactor is complete. For now, it makes things easier to comprehend. Differential Revision: https://phab.mercurial-scm.org/D4075
author Gregory Szorc <gregory.szorc@gmail.com>
date Fri, 03 Aug 2018 10:05:26 -0700
parents eb022ce9e505
children 19344024a8e1
comparison
equal deleted inserted replaced
38892:eb022ce9e505 38893:23d582caae30
15 from .node import ( 15 from .node import (
16 hex, 16 hex,
17 nullid, 17 nullid,
18 nullrev, 18 nullrev,
19 short, 19 short,
20 )
21
22 from .thirdparty import (
23 attr,
20 ) 24 )
21 25
22 from . import ( 26 from . import (
23 dagutil, 27 dagutil,
24 error, 28 error,
492 if len(d) < n: 496 if len(d) < n:
493 d += readexactly(self._fh, n - len(d)) 497 d += readexactly(self._fh, n - len(d))
494 return d 498 return d
495 return readexactly(self._fh, n) 499 return readexactly(self._fh, n)
496 500
501 @attr.s(slots=True, frozen=True)
502 class revisiondelta(object):
503 """Describes a delta entry in a changegroup.
504
505 Captured data is sufficient to serialize the delta into multiple
506 formats.
507 """
508 # 20 byte node of this revision.
509 node = attr.ib()
510 # 20 byte nodes of parent revisions.
511 p1node = attr.ib()
512 p2node = attr.ib()
513 # 20 byte node of node this delta is against.
514 basenode = attr.ib()
515 # 20 byte node of changeset revision this delta is associated with.
516 linknode = attr.ib()
517 # 2 bytes of flags to apply to revision data.
518 flags = attr.ib()
519 # Iterable of chunks holding raw delta data.
520 deltachunks = attr.ib()
497 521
498 class cg1packer(object): 522 class cg1packer(object):
499 deltaheader = _CHANGEGROUPV1_DELTA_HEADER 523 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
500 version = '01' 524 version = '01'
501 def __init__(self, repo, filematcher, bundlecaps=None): 525 def __init__(self, repo, filematcher, bundlecaps=None):
897 raise error.ProgrammingError('cg1 should not be used in this case') 921 raise error.ProgrammingError('cg1 should not be used in this case')
898 return prev 922 return prev
899 923
900 def revchunk(self, store, rev, prev, linknode): 924 def revchunk(self, store, rev, prev, linknode):
901 if util.safehasattr(self, 'full_nodes'): 925 if util.safehasattr(self, 'full_nodes'):
902 fn = self._revchunknarrow 926 fn = self._revisiondeltanarrow
903 else: 927 else:
904 fn = self._revchunknormal 928 fn = self._revisiondeltanormal
905 929
906 return fn(store, rev, prev, linknode) 930 delta = fn(store, rev, prev, linknode)
907 931 if not delta:
908 def _revchunknormal(self, store, rev, prev, linknode): 932 return
933
934 meta = self.builddeltaheader(delta.node, delta.p1node, delta.p2node,
935 delta.basenode, delta.linknode,
936 delta.flags)
937 l = len(meta) + sum(len(x) for x in delta.deltachunks)
938
939 yield chunkheader(l)
940 yield meta
941 for x in delta.deltachunks:
942 yield x
943
944 def _revisiondeltanormal(self, store, rev, prev, linknode):
909 node = store.node(rev) 945 node = store.node(rev)
910 p1, p2 = store.parentrevs(rev) 946 p1, p2 = store.parentrevs(rev)
911 base = self.deltaparent(store, rev, p1, p2, prev) 947 base = self.deltaparent(store, rev, p1, p2, prev)
912 948
913 prefix = '' 949 prefix = ''
925 delta = store.revision(node, raw=True) 961 delta = store.revision(node, raw=True)
926 prefix = mdiff.trivialdiffheader(len(delta)) 962 prefix = mdiff.trivialdiffheader(len(delta))
927 else: 963 else:
928 delta = store.revdiff(base, rev) 964 delta = store.revdiff(base, rev)
929 p1n, p2n = store.parents(node) 965 p1n, p2n = store.parents(node)
930 basenode = store.node(base) 966
931 flags = store.flags(rev) 967 return revisiondelta(
932 meta = self.builddeltaheader(node, p1n, p2n, basenode, linknode, flags) 968 node=node,
933 meta += prefix 969 p1node=p1n,
934 l = len(meta) + len(delta) 970 p2node=p2n,
935 yield chunkheader(l) 971 basenode=store.node(base),
936 yield meta 972 linknode=linknode,
937 yield delta 973 flags=store.flags(rev),
938 974 deltachunks=(prefix, delta),
939 def _revchunknarrow(self, store, rev, prev, linknode): 975 )
976
977 def _revisiondeltanarrow(self, store, rev, prev, linknode):
940 # build up some mapping information that's useful later. See 978 # build up some mapping information that's useful later. See
941 # the local() nested function below. 979 # the local() nested function below.
942 if not self.changelog_done: 980 if not self.changelog_done:
943 self.clnode_to_rev[linknode] = rev 981 self.clnode_to_rev[linknode] = rev
944 linkrev = rev 982 linkrev = rev
948 self.clrev_to_localrev[linkrev] = rev 986 self.clrev_to_localrev[linkrev] = rev
949 987
950 # This is a node to send in full, because the changeset it 988 # This is a node to send in full, because the changeset it
951 # corresponds to was a full changeset. 989 # corresponds to was a full changeset.
952 if linknode in self.full_nodes: 990 if linknode in self.full_nodes:
953 for x in self._revchunknormal(store, rev, prev, linknode): 991 return self._revisiondeltanormal(store, rev, prev, linknode)
954 yield x
955 return
956 992
957 # At this point, a node can either be one we should skip or an 993 # At this point, a node can either be one we should skip or an
958 # ellipsis. If it's not an ellipsis, bail immediately. 994 # ellipsis. If it's not an ellipsis, bail immediately.
959 if linkrev not in self.precomputed_ellipsis: 995 if linkrev not in self.precomputed_ellipsis:
960 return 996 return
1041 1077
1042 n = store.node(rev) 1078 n = store.node(rev)
1043 p1n, p2n = store.node(p1), store.node(p2) 1079 p1n, p2n = store.node(p1), store.node(p2)
1044 flags = store.flags(rev) 1080 flags = store.flags(rev)
1045 flags |= revlog.REVIDX_ELLIPSIS 1081 flags |= revlog.REVIDX_ELLIPSIS
1046 meta = self.builddeltaheader( 1082
1047 n, p1n, p2n, nullid, linknode, flags)
1048 # TODO: try and actually send deltas for ellipsis data blocks 1083 # TODO: try and actually send deltas for ellipsis data blocks
1049 data = store.revision(n) 1084 data = store.revision(n)
1050 diffheader = mdiff.trivialdiffheader(len(data)) 1085 diffheader = mdiff.trivialdiffheader(len(data))
1051 l = len(meta) + len(diffheader) + len(data) 1086
1052 yield ''.join((chunkheader(l), 1087 return revisiondelta(
1053 meta, 1088 node=n,
1054 diffheader, 1089 p1node=p1n,
1055 data)) 1090 p2node=p2n,
1091 basenode=nullid,
1092 linknode=linknode,
1093 flags=flags,
1094 deltachunks=(diffheader, data),
1095 )
1056 1096
1057 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags): 1097 def builddeltaheader(self, node, p1n, p2n, basenode, linknode, flags):
1058 # do nothing with basenode, it is implicitly the previous one in HG10 1098 # do nothing with basenode, it is implicitly the previous one in HG10
1059 # do nothing with flags, it is implicitly 0 for cg1 and cg2 1099 # do nothing with flags, it is implicitly 0 for cg1 and cg2
1060 return struct.pack(self.deltaheader, node, p1n, p2n, linknode) 1100 return struct.pack(self.deltaheader, node, p1n, p2n, linknode)