Mercurial > hg
changeset 27237:c08814b48ae5
changegroup: avoid iterating the whole manifest
The old code gathered the list of all files that changed anywhere in
history and then gathered changed file nodes by walking the entirety
of each manifest to be sent in order to gather changed file
nodes. That's going to be unfortunate for narrowhg, and it's already
inefficient for medium-to-large repositories.
Timings for bundle --all on my hg repo, tested with hgperf:
Before:
! wall 23.442445 comb 23.440000 user 23.250000 sys 0.190000 (best of 3)
After:
! wall 20.272187 comb 20.270000 user 20.190000 sys 0.080000 (best of 3)
author | Augie Fackler <augie@google.com> |
---|---|
date | Fri, 04 Dec 2015 10:34:58 -0500 |
parents | b0d90fef16b6 |
children | c3dc03109401 |
files | mercurial/changegroup.py |
diffstat | 1 files changed, 16 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/changegroup.py Fri Dec 04 15:59:46 2015 -0500 +++ b/mercurial/changegroup.py Fri Dec 04 10:34:58 2015 -0500 @@ -613,7 +613,8 @@ clrevorder = {} mfs = {} # needed manifests fnodes = {} # needed file nodes - changedfiles = set() + # maps manifest node id -> set(changed files) + mfchangedfiles = {} # Callback for the changelog, used to collect changed files and manifest # nodes. @@ -621,9 +622,12 @@ def lookupcl(x): c = cl.read(x) clrevorder[x] = len(clrevorder) - changedfiles.update(c[3]) + n = c[0] # record the first changeset introducing this manifest version - mfs.setdefault(c[0], x) + mfs.setdefault(n, x) + # Record a complete list of potentially-changed files in + # this manifest. + mfchangedfiles.setdefault(n, set()).update(c[3]) return x self._verbosenote(_('uncompressed size of bundle content:\n')) @@ -668,8 +672,12 @@ clnode = mfs[x] if not fastpathlinkrev: mdata = ml.readfast(x) - for f, n in mdata.iteritems(): - if f in changedfiles: + for f in mfchangedfiles[x]: + if True: + try: + n = mdata[f] + except KeyError: + continue # record the first changeset introducing this filelog # version fclnodes = fnodes.setdefault(f, {}) @@ -696,6 +704,9 @@ return dict(genfilenodes()) return fnodes.get(fname, {}) + changedfiles = set() + for x in mfchangedfiles.itervalues(): + changedfiles.update(x) for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, source): yield chunk