# HG changeset patch # User Gregory Szorc # Date 1533344396 25200 # Node ID f7228c907ef435e02970b281d645b4ec6cc8fbc2 # Parent 87b737b78bd05399c572aaefc6ee62884b28d52c changegroup: factor changelog chunk generation into own function We have separate functions for generating manifests and filelogs. Let's split changelog into its own function so things are consistent. As part of this, we refactor the code slightly. Before, the changelog linknode callback was updating state on variables inherited via a closure. Since the closure is now separate from generate(), we need to a way pass state between generate() and _generatechangelog(). The return value of _generatechangelog() is a 2-tuple where the first item is a dict containing accumulated state. We then alias some of its members into the scope of generate() to reduce code churn. I will be converting other functions to a similar pattern in future commits. Differential Revision: https://phab.mercurial-scm.org/D4133 diff -r 87b737b78bd0 -r f7228c907ef4 mercurial/changegroup.py --- a/mercurial/changegroup.py Fri Aug 03 14:16:14 2018 -0700 +++ b/mercurial/changegroup.py Fri Aug 03 17:59:56 2018 -0700 @@ -715,14 +715,100 @@ yield chunk def generate(self, commonrevs, clnodes, fastpathlinkrev, source): - '''yield a sequence of changegroup chunks (strings)''' + """Yield a sequence of changegroup byte chunks.""" + repo = self._repo cl = repo.changelog + self._verbosenote(_('uncompressed size of bundle content:\n')) + size = 0 + + clstate, chunks = self._generatechangelog(cl, clnodes) + for chunk in chunks: + size += len(chunk) + yield chunk + + self._verbosenote(_('%8.i (changelog)\n') % size) + + clrevorder = clstate['clrevorder'] + mfs = clstate['mfs'] + changedfiles = clstate['changedfiles'] + + # We need to make sure that the linkrev in the changegroup refers to + # the first changeset that introduced the manifest or file revision. + # The fastpath is usually safer than the slowpath, because the filelogs + # are walked in revlog order. + # + # When taking the slowpath with reorder=None and the manifest revlog + # uses generaldelta, the manifest may be walked in the "wrong" order. + # Without 'clrevorder', we would get an incorrect linkrev (see fix in + # cc0ff93d0c0c). + # + # When taking the fastpath, we are only vulnerable to reordering + # of the changelog itself. The changelog never uses generaldelta, so + # it is only reordered when reorder=True. To handle this case, we + # simply take the slowpath, which already has the 'clrevorder' logic. + # This was also fixed in cc0ff93d0c0c. + fastpathlinkrev = fastpathlinkrev and not self._reorder + # Treemanifests don't work correctly with fastpathlinkrev + # either, because we don't discover which directory nodes to + # send along with files. This could probably be fixed. + fastpathlinkrev = fastpathlinkrev and ( + 'treemanifest' not in repo.requirements) + + fnodes = {} # needed file nodes + + for chunk in self.generatemanifests(commonrevs, clrevorder, + fastpathlinkrev, mfs, fnodes, source): + yield chunk + + if self._ellipses: + mfdicts = None + if self._isshallow: + mfdicts = [(self._repo.manifestlog[n].read(), lr) + for (n, lr) in mfs.iteritems()] + + mfs.clear() + clrevs = set(cl.rev(x) for x in clnodes) + + if not fastpathlinkrev: + def linknodes(unused, fname): + return fnodes.get(fname, {}) + else: + cln = cl.node + def linknodes(filerevlog, fname): + llr = filerevlog.linkrev + fln = filerevlog.node + revs = ((r, llr(r)) for r in filerevlog) + return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs) + + if self._ellipses: + # We need to pass the mfdicts variable down into + # generatefiles(), but more than one command might have + # wrapped generatefiles so we can't modify the function + # signature. Instead, we pass the data to ourselves using an + # instance attribute. I'm sorry. + self._mfdicts = mfdicts + + for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, + source): + yield chunk + + yield self._close() + + if clnodes: + repo.hook('outgoing', node=hex(clnodes[0]), source=source) + + def _generatechangelog(self, cl, nodes): + """Generate data for changelog chunks. + + Returns a 2-tuple of a dict containing state and an iterable of + byte chunks. The state will not be fully populated until the + chunk stream has been fully consumed. + """ clrevorder = {} mfs = {} # needed manifests - fnodes = {} # needed file nodes - mfl = repo.manifestlog + mfl = self._repo.manifestlog # TODO violates storage abstraction. mfrevlog = mfl._revlog changedfiles = set() @@ -768,75 +854,15 @@ return x - self._verbosenote(_('uncompressed size of bundle content:\n')) - size = 0 - for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): - size += len(chunk) - yield chunk - self._verbosenote(_('%8.i (changelog)\n') % size) - - # We need to make sure that the linkrev in the changegroup refers to - # the first changeset that introduced the manifest or file revision. - # The fastpath is usually safer than the slowpath, because the filelogs - # are walked in revlog order. - # - # When taking the slowpath with reorder=None and the manifest revlog - # uses generaldelta, the manifest may be walked in the "wrong" order. - # Without 'clrevorder', we would get an incorrect linkrev (see fix in - # cc0ff93d0c0c). - # - # When taking the fastpath, we are only vulnerable to reordering - # of the changelog itself. The changelog never uses generaldelta, so - # it is only reordered when reorder=True. To handle this case, we - # simply take the slowpath, which already has the 'clrevorder' logic. - # This was also fixed in cc0ff93d0c0c. - fastpathlinkrev = fastpathlinkrev and not self._reorder - # Treemanifests don't work correctly with fastpathlinkrev - # either, because we don't discover which directory nodes to - # send along with files. This could probably be fixed. - fastpathlinkrev = fastpathlinkrev and ( - 'treemanifest' not in repo.requirements) - - for chunk in self.generatemanifests(commonrevs, clrevorder, - fastpathlinkrev, mfs, fnodes, source): - yield chunk + state = { + 'clrevorder': clrevorder, + 'mfs': mfs, + 'changedfiles': changedfiles, + } - if self._ellipses: - mfdicts = None - if self._isshallow: - mfdicts = [(self._repo.manifestlog[n].read(), lr) - for (n, lr) in mfs.iteritems()] - - mfs.clear() - clrevs = set(cl.rev(x) for x in clnodes) + gen = self.group(nodes, cl, lookupcl, units=_('changesets')) - if not fastpathlinkrev: - def linknodes(unused, fname): - return fnodes.get(fname, {}) - else: - cln = cl.node - def linknodes(filerevlog, fname): - llr = filerevlog.linkrev - fln = filerevlog.node - revs = ((r, llr(r)) for r in filerevlog) - return dict((fln(r), cln(lr)) for r, lr in revs if lr in clrevs) - - if self._ellipses: - # We need to pass the mfdicts variable down into - # generatefiles(), but more than one command might have - # wrapped generatefiles so we can't modify the function - # signature. Instead, we pass the data to ourselves using an - # instance attribute. I'm sorry. - self._mfdicts = mfdicts - - for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, - source): - yield chunk - - yield self._close() - - if clnodes: - repo.hook('outgoing', node=hex(clnodes[0]), source=source) + return state, gen def generatemanifests(self, commonrevs, clrevorder, fastpathlinkrev, mfs, fnodes, source):