Mercurial > hg
comparison mercurial/changegroup.py @ 27237:c08814b48ae5
changegroup: avoid iterating the whole manifest
The old code gathered the list of all files that changed anywhere in
history and then gathered changed file nodes by walking the entirety
of each manifest to be sent in order to gather changed file
nodes. That's going to be unfortunate for narrowhg, and it's already
inefficient for medium-to-large repositories.
Timings for bundle --all on my hg repo, tested with hgperf:
Before:
! wall 23.442445 comb 23.440000 user 23.250000 sys 0.190000 (best of 3)
After:
! wall 20.272187 comb 20.270000 user 20.190000 sys 0.080000 (best of 3)
author | Augie Fackler <augie@google.com> |
---|---|
date | Fri, 04 Dec 2015 10:34:58 -0500 |
parents | beb60a898dd0 |
children | c3dc03109401 |
comparison
equal
deleted
inserted
replaced
27236:b0d90fef16b6 | 27237:c08814b48ae5 |
---|---|
611 ml = repo.manifest | 611 ml = repo.manifest |
612 | 612 |
613 clrevorder = {} | 613 clrevorder = {} |
614 mfs = {} # needed manifests | 614 mfs = {} # needed manifests |
615 fnodes = {} # needed file nodes | 615 fnodes = {} # needed file nodes |
616 changedfiles = set() | 616 # maps manifest node id -> set(changed files) |
617 mfchangedfiles = {} | |
617 | 618 |
618 # Callback for the changelog, used to collect changed files and manifest | 619 # Callback for the changelog, used to collect changed files and manifest |
619 # nodes. | 620 # nodes. |
620 # Returns the linkrev node (identity in the changelog case). | 621 # Returns the linkrev node (identity in the changelog case). |
621 def lookupcl(x): | 622 def lookupcl(x): |
622 c = cl.read(x) | 623 c = cl.read(x) |
623 clrevorder[x] = len(clrevorder) | 624 clrevorder[x] = len(clrevorder) |
624 changedfiles.update(c[3]) | 625 n = c[0] |
625 # record the first changeset introducing this manifest version | 626 # record the first changeset introducing this manifest version |
626 mfs.setdefault(c[0], x) | 627 mfs.setdefault(n, x) |
628 # Record a complete list of potentially-changed files in | |
629 # this manifest. | |
630 mfchangedfiles.setdefault(n, set()).update(c[3]) | |
627 return x | 631 return x |
628 | 632 |
629 self._verbosenote(_('uncompressed size of bundle content:\n')) | 633 self._verbosenote(_('uncompressed size of bundle content:\n')) |
630 size = 0 | 634 size = 0 |
631 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): | 635 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): |
666 after manifests have been sent to the client. | 670 after manifests have been sent to the client. |
667 """ | 671 """ |
668 clnode = mfs[x] | 672 clnode = mfs[x] |
669 if not fastpathlinkrev: | 673 if not fastpathlinkrev: |
670 mdata = ml.readfast(x) | 674 mdata = ml.readfast(x) |
671 for f, n in mdata.iteritems(): | 675 for f in mfchangedfiles[x]: |
672 if f in changedfiles: | 676 if True: |
677 try: | |
678 n = mdata[f] | |
679 except KeyError: | |
680 continue | |
673 # record the first changeset introducing this filelog | 681 # record the first changeset introducing this filelog |
674 # version | 682 # version |
675 fclnodes = fnodes.setdefault(f, {}) | 683 fclnodes = fnodes.setdefault(f, {}) |
676 fclnode = fclnodes.setdefault(n, clnode) | 684 fclnode = fclnodes.setdefault(n, clnode) |
677 if clrevorder[clnode] < clrevorder[fclnode]: | 685 if clrevorder[clnode] < clrevorder[fclnode]: |
694 if linkrev in clrevs: | 702 if linkrev in clrevs: |
695 yield filerevlog.node(r), cl.node(linkrev) | 703 yield filerevlog.node(r), cl.node(linkrev) |
696 return dict(genfilenodes()) | 704 return dict(genfilenodes()) |
697 return fnodes.get(fname, {}) | 705 return fnodes.get(fname, {}) |
698 | 706 |
707 changedfiles = set() | |
708 for x in mfchangedfiles.itervalues(): | |
709 changedfiles.update(x) | |
699 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, | 710 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, |
700 source): | 711 source): |
701 yield chunk | 712 yield chunk |
702 | 713 |
703 yield self.close() | 714 yield self.close() |