comparison mercurial/manifest.py @ 24670:dfb86af18a35

treemanifest: optimize treemanifest._walk() to skip directories This makes treemanifest.walk() not visit submanifests that are known not to have any matching files. It does this by calling match.visitdir() on submanifests as it walks. This change also updates largefiles to be able to work with this new behavior in treemanifests. It overrides match.visitdir(), the function that dictates how walk() and matches() skip over directories. The greatest speed improvements are seen with narrower scopes. For example, this commit speeds up the following command on the Mozilla repo from 1.14s to 1.02s: hg files -r . dom/apps/ Whereas with a wider scope, dom/, the speed only improves from 1.21s to 1.13s. As with similar a similar optimization to treemanifest.matches(), this change will bring out even bigger performance improvements once treemanifests are loaded lazily. Once that happens, we won't just skip over looking at submanifests, but we'll skip even loading them.
author Drew Gottlieb <drgott@google.com>
date Tue, 07 Apr 2015 15:18:52 -0700
parents 19c5b0913960
children aef3d1469773
comparison
equal deleted inserted replaced
24669:fbdbff1b486a 24670:dfb86af18a35
607 607
608 It also reports nonexistent files by marking them bad with match.bad(). 608 It also reports nonexistent files by marking them bad with match.bad().
609 ''' 609 '''
610 fset = set(match.files()) 610 fset = set(match.files())
611 611
612 # avoid the entire walk if we're only looking for specific files
613 if fset and not match.anypats():
614 if util.all(fn in self for fn in fset):
615 for fn in sorted(fset):
616 yield fn
617 raise StopIteration
618
619 for fn in self._walk(match): 612 for fn in self._walk(match):
620 if fn in fset: 613 if fn in fset:
621 # specified pattern is the exact name 614 # specified pattern is the exact name
622 fset.remove(fn) 615 fset.remove(fn)
623 yield fn 616 yield fn
628 621
629 for fn in sorted(fset): 622 for fn in sorted(fset):
630 if not self.hasdir(fn): 623 if not self.hasdir(fn):
631 match.bad(fn, None) 624 match.bad(fn, None)
632 625
633 def _walk(self, match): 626 def _walk(self, match, alldirs=False):
634 '''Recursively generates matching file names for walk().''' 627 '''Recursively generates matching file names for walk().
628
629 Will visit all subdirectories if alldirs is True, otherwise it will
630 only visit subdirectories for which match.visitdir is True.'''
631
632 if not alldirs:
633 # substring to strip trailing slash
634 visit = match.visitdir(self._dir[:-1] or '.')
635 if not visit:
636 return
637 alldirs = (visit == 'all')
635 638
636 # yield this dir's files and walk its submanifests 639 # yield this dir's files and walk its submanifests
637 for p in sorted(self._dirs.keys() + self._files.keys()): 640 for p in sorted(self._dirs.keys() + self._files.keys()):
638 if p in self._files: 641 if p in self._files:
639 fullp = self._subpath(p) 642 fullp = self._subpath(p)
640 if match(fullp): 643 if match(fullp):
641 yield fullp 644 yield fullp
642 else: 645 else:
643 for f in self._dirs[p]._walk(match): 646 for f in self._dirs[p]._walk(match, alldirs):
644 yield f 647 yield f
645 648
646 def matches(self, match): 649 def matches(self, match):
647 '''generate a new manifest filtered by the match argument''' 650 '''generate a new manifest filtered by the match argument'''
648 if match.always(): 651 if match.always():