Mercurial > hg-stable
changeset 39014:f7228c907ef4
changegroup: factor changelog chunk generation into own function
We have separate functions for generating manifests and filelogs.
Let's split changelog into its own function so things are consistent.
As part of this, we refactor the code slightly. Before, the
changelog linknode callback was updating state on variables
inherited via a closure. Since the closure is now separate from
generate(), we need to a way pass state between generate() and
_generatechangelog(). The return value of _generatechangelog()
is a 2-tuple where the first item is a dict containing accumulated
state. We then alias some of its members into the scope of
generate() to reduce code churn.
I will be converting other functions to a similar pattern in future
commits.
Differential Revision: https://phab.mercurial-scm.org/D4133
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Fri, 03 Aug 2018 17:59:56 -0700 |
parents | 87b737b78bd0 |
children | a1f694779b2f |
files | mercurial/changegroup.py |
diffstat | 1 files changed, 96 insertions(+), 70 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/changegroup.py Fri Aug 03 14:16:14 2018 -0700 +++ b/mercurial/changegroup.py Fri Aug 03 17:59:56 2018 -0700 @@ -715,14 +715,100 @@ yield chunk def generate(self, commonrevs, clnodes, fastpathlinkrev, source): - '''yield a sequence of changegroup chunks (strings)''' + """Yield a sequence of changegroup byte chunks.""" + repo = self._repo cl = repo.changelog + self._verbosenote(_('uncompressed size of bundle content:\n')) + size = 0 + + clstate, chunks = self._generatechangelog(cl, clnodes) + for chunk in chunks: + size += len(chunk) + yield chunk + + self._verbosenote(_('%8.i (changelog)\n') % size) + + clrevorder = clstate['clrevorder'] + mfs = clstate['mfs'] + changedfiles = clstate['changedfiles'] + + # We need to make sure that the linkrev in the changegroup refers to + # the first changeset that introduced the manifest or file revision. + # The fastpath is usually safer than the slowpath, because the filelogs + # are walked in revlog order. + # + # When taking the slowpath with reorder=None and the manifest revlog + # uses generaldelta, the manifest may be walked in the "wrong" order. + # Without 'clrevorder', we would get an incorrect linkrev (see fix in + # cc0ff93d0c0c). + # + # When taking the fastpath, we are only vulnerable to reordering + # of the changelog itself. The changelog never uses generaldelta, so + # it is only reordered when reorder=True. To handle this case, we + # simply take the slowpath, which already has the 'clrevorder' logic. + # This was also fixed in cc0ff93d0c0c. + fastpathlinkrev = fastpathlinkrev and not self._reorder + # Treemanifests don't work correctly with fastpathlinkrev + # either, because we don't discover which directory nodes to + # send along with files. This could probably be fixed. + fastpathlinkrev = fastpathlinkrev and ( + 'treemanifest' not in repo.requirements) + + fnodes = {} # needed file nodes + + for chunk in self.generatemanifests(commonrevs, clrevorder, + fastpathlinkrev, mfs, fnodes, source): + yield chunk + + if self._ellipses: + mfdicts = None + if self._isshallow: + mfdicts = [(self._repo.manifestlog[n].read(), lr) + for (n, lr) in mfs.iteritems()] + + mfs.clear() + clrevs = set(cl.rev(x) for x in clnodes) + + if not fastpathlinkrev: + def linknodes(unused, fname): + return fnodes.get(fname, {}) + else: + cln = cl.node + def linknodes(filerevlog, fname): + llr = filerevlog.linkrev + fln = filerevlog.node + revs = ((r, llr(r)) for r in filerevlog) + return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs) + + if self._ellipses: + # We need to pass the mfdicts variable down into + # generatefiles(), but more than one command might have + # wrapped generatefiles so we can't modify the function + # signature. Instead, we pass the data to ourselves using an + # instance attribute. I'm sorry. + self._mfdicts = mfdicts + + for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, + source): + yield chunk + + yield self._close() + + if clnodes: + repo.hook('outgoing', node=hex(clnodes[0]), source=source) + + def _generatechangelog(self, cl, nodes): + """Generate data for changelog chunks. + + Returns a 2-tuple of a dict containing state and an iterable of + byte chunks. The state will not be fully populated until the + chunk stream has been fully consumed. + """ clrevorder = {} mfs = {} # needed manifests - fnodes = {} # needed file nodes - mfl = repo.manifestlog + mfl = self._repo.manifestlog # TODO violates storage abstraction. mfrevlog = mfl._revlog changedfiles = set() @@ -768,75 +854,15 @@ return x - self._verbosenote(_('uncompressed size of bundle content:\n')) - size = 0 - for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): - size += len(chunk) - yield chunk - self._verbosenote(_('%8.i (changelog)\n') % size) - - # We need to make sure that the linkrev in the changegroup refers to - # the first changeset that introduced the manifest or file revision. - # The fastpath is usually safer than the slowpath, because the filelogs - # are walked in revlog order. - # - # When taking the slowpath with reorder=None and the manifest revlog - # uses generaldelta, the manifest may be walked in the "wrong" order. - # Without 'clrevorder', we would get an incorrect linkrev (see fix in - # cc0ff93d0c0c). - # - # When taking the fastpath, we are only vulnerable to reordering - # of the changelog itself. The changelog never uses generaldelta, so - # it is only reordered when reorder=True. To handle this case, we - # simply take the slowpath, which already has the 'clrevorder' logic. - # This was also fixed in cc0ff93d0c0c. - fastpathlinkrev = fastpathlinkrev and not self._reorder - # Treemanifests don't work correctly with fastpathlinkrev - # either, because we don't discover which directory nodes to - # send along with files. This could probably be fixed. - fastpathlinkrev = fastpathlinkrev and ( - 'treemanifest' not in repo.requirements) - - for chunk in self.generatemanifests(commonrevs, clrevorder, - fastpathlinkrev, mfs, fnodes, source): - yield chunk + state = { + 'clrevorder': clrevorder, + 'mfs': mfs, + 'changedfiles': changedfiles, + } - if self._ellipses: - mfdicts = None - if self._isshallow: - mfdicts = [(self._repo.manifestlog[n].read(), lr) - for (n, lr) in mfs.iteritems()] - - mfs.clear() - clrevs = set(cl.rev(x) for x in clnodes) + gen = self.group(nodes, cl, lookupcl, units=_('changesets')) - if not fastpathlinkrev: - def linknodes(unused, fname): - return fnodes.get(fname, {}) - else: - cln = cl.node - def linknodes(filerevlog, fname): - llr = filerevlog.linkrev - fln = filerevlog.node - revs = ((r, llr(r)) for r in filerevlog) - return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs) - - if self._ellipses: - # We need to pass the mfdicts variable down into - # generatefiles(), but more than one command might have - # wrapped generatefiles so we can't modify the function - # signature. Instead, we pass the data to ourselves using an - # instance attribute. I'm sorry. - self._mfdicts = mfdicts - - for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, - source): - yield chunk - - yield self._close() - - if clnodes: - repo.hook('outgoing', node=hex(clnodes[0]), source=source) + return state, gen def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source):