comparison mercurial/changegroup.py @ 27237:c08814b48ae5

changegroup: avoid iterating the whole manifest The old code gathered the list of all files that changed anywhere in history and then gathered changed file nodes by walking the entirety of each manifest to be sent in order to gather changed file nodes. That's going to be unfortunate for narrowhg, and it's already inefficient for medium-to-large repositories. Timings for bundle --all on my hg repo, tested with hgperf: Before: ! wall 23.442445 comb 23.440000 user 23.250000 sys 0.190000 (best of 3) After: ! wall 20.272187 comb 20.270000 user 20.190000 sys 0.080000 (best of 3)
author Augie Fackler <augie@google.com>
date Fri, 04 Dec 2015 10:34:58 -0500
parents beb60a898dd0
children c3dc03109401
comparison
equal deleted inserted replaced
27236:b0d90fef16b6 27237:c08814b48ae5
611 ml = repo.manifest 611 ml = repo.manifest
612 612
613 clrevorder = {} 613 clrevorder = {}
614 mfs = {} # needed manifests 614 mfs = {} # needed manifests
615 fnodes = {} # needed file nodes 615 fnodes = {} # needed file nodes
616 changedfiles = set() 616 # maps manifest node id -> set(changed files)
617 mfchangedfiles = {}
617 618
618 # Callback for the changelog, used to collect changed files and manifest 619 # Callback for the changelog, used to collect changed files and manifest
619 # nodes. 620 # nodes.
620 # Returns the linkrev node (identity in the changelog case). 621 # Returns the linkrev node (identity in the changelog case).
621 def lookupcl(x): 622 def lookupcl(x):
622 c = cl.read(x) 623 c = cl.read(x)
623 clrevorder[x] = len(clrevorder) 624 clrevorder[x] = len(clrevorder)
624 changedfiles.update(c[3]) 625 n = c[0]
625 # record the first changeset introducing this manifest version 626 # record the first changeset introducing this manifest version
626 mfs.setdefault(c[0], x) 627 mfs.setdefault(n, x)
628 # Record a complete list of potentially-changed files in
629 # this manifest.
630 mfchangedfiles.setdefault(n, set()).update(c[3])
627 return x 631 return x
628 632
629 self._verbosenote(_('uncompressed size of bundle content:\n')) 633 self._verbosenote(_('uncompressed size of bundle content:\n'))
630 size = 0 634 size = 0
631 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')): 635 for chunk in self.group(clnodes, cl, lookupcl, units=_('changesets')):
666 after manifests have been sent to the client. 670 after manifests have been sent to the client.
667 """ 671 """
668 clnode = mfs[x] 672 clnode = mfs[x]
669 if not fastpathlinkrev: 673 if not fastpathlinkrev:
670 mdata = ml.readfast(x) 674 mdata = ml.readfast(x)
671 for f, n in mdata.iteritems(): 675 for f in mfchangedfiles[x]:
672 if f in changedfiles: 676 if True:
677 try:
678 n = mdata[f]
679 except KeyError:
680 continue
673 # record the first changeset introducing this filelog 681 # record the first changeset introducing this filelog
674 # version 682 # version
675 fclnodes = fnodes.setdefault(f, {}) 683 fclnodes = fnodes.setdefault(f, {})
676 fclnode = fclnodes.setdefault(n, clnode) 684 fclnode = fclnodes.setdefault(n, clnode)
677 if clrevorder[clnode] < clrevorder[fclnode]: 685 if clrevorder[clnode] < clrevorder[fclnode]:
694 if linkrev in clrevs: 702 if linkrev in clrevs:
695 yield filerevlog.node(r), cl.node(linkrev) 703 yield filerevlog.node(r), cl.node(linkrev)
696 return dict(genfilenodes()) 704 return dict(genfilenodes())
697 return fnodes.get(fname, {}) 705 return fnodes.get(fname, {})
698 706
707 changedfiles = set()
708 for x in mfchangedfiles.itervalues():
709 changedfiles.update(x)
699 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs, 710 for chunk in self.generatefiles(changedfiles, linknodes, commonrevs,
700 source): 711 source):
701 yield chunk 712 yield chunk
702 713
703 yield self.close() 714 yield self.close()