comparison mercurial/match.py @ 32176:cf042543afa2

match: optimize visitdir() for patterns matching only root directory Because _rootsanddirs() returns a list of directories to visit recursively and a list of directories to visit non-recursively. For patterns such as 'rootfilesin:foo/bar', we clearly need to visit the directory foo/bar, but we also need to visit its parents. The method therefore uses util.dirs() to find the parent directories of 'foo/bar'. That method does not include the root directory, but since we obviously need to visit the root directory, we always added '.' to the set of directories to visit non-recursively. The visitdir() method had special handling to consider set(['.']) to mean that no includes had been specified and would thus visit all directories. However, when the pattern is 'rootfilesin:.', set(['.']) is actually the real set of directories to visit and the special handling of that set meant that all directories got visited instead of just the root directory. The fix is simple: add '.' to the set of parent directories in _rootsanddirs() and stop treating set(['.']) specially. This makes hg files -r . -I rootfilesin:. in a treemanifest version of the Firefox repo go from 1.5s to 0.26s on warm disk (and a *much* bigger improvement on cold disk). Note that the -I is necessary for no good reason. We just haven't optimized visitdir() for regular (non-include, non-exclude) patterns yet.
author Martin von Zweigbergk <martinvonz@google.com>
date Fri, 05 May 2017 08:49:07 -0700
parents 6dea1701f170
children bd872f64a8ba
comparison
equal deleted inserted replaced
32175:456b4a32d75f 32176:cf042543afa2
132 132
133 # roots are directories which are recursively included/excluded. 133 # roots are directories which are recursively included/excluded.
134 self._includeroots = set() 134 self._includeroots = set()
135 self._excluderoots = set() 135 self._excluderoots = set()
136 # dirs are directories which are non-recursively included. 136 # dirs are directories which are non-recursively included.
137 self._includedirs = set(['.']) 137 self._includedirs = set()
138 138
139 if badfn is not None: 139 if badfn is not None:
140 self.bad = badfn 140 self.bad = badfn
141 141
142 matchfns = [] 142 matchfns = []
252 ''' 252 '''
253 if self.prefix() and dir in self._fileroots: 253 if self.prefix() and dir in self._fileroots:
254 return 'all' 254 return 'all'
255 if dir in self._excluderoots: 255 if dir in self._excluderoots:
256 return False 256 return False
257 if ((self._includeroots or self._includedirs != set(['.'])) and 257 if ((self._includeroots or self._includedirs) and
258 '.' not in self._includeroots and 258 '.' not in self._includeroots and
259 dir not in self._includeroots and 259 dir not in self._includeroots and
260 dir not in self._includedirs and 260 dir not in self._includedirs and
261 not any(parent in self._includeroots 261 not any(parent in self._includeroots
262 for parent in util.finddirs(dir))): 262 for parent in util.finddirs(dir))):
682 include directories that need to be implicitly considered as either, such as 682 include directories that need to be implicitly considered as either, such as
683 parent directories. 683 parent directories.
684 684
685 >>> _rootsanddirs(\ 685 >>> _rootsanddirs(\
686 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')]) 686 [('glob', 'g/h/*', ''), ('glob', 'g/h', ''), ('glob', 'g*', '')])
687 (['g/h', 'g/h', '.'], ['g']) 687 (['g/h', 'g/h', '.'], ['g', '.'])
688 >>> _rootsanddirs(\ 688 >>> _rootsanddirs(\
689 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')]) 689 [('rootfilesin', 'g/h', ''), ('rootfilesin', '', '')])
690 ([], ['g/h', '.', 'g']) 690 ([], ['g/h', '.', 'g', '.'])
691 >>> _rootsanddirs(\ 691 >>> _rootsanddirs(\
692 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')]) 692 [('relpath', 'r', ''), ('path', 'p/p', ''), ('path', '', '')])
693 (['r', 'p/p', '.'], ['p']) 693 (['r', 'p/p', '.'], ['p', '.'])
694 >>> _rootsanddirs(\ 694 >>> _rootsanddirs(\
695 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')]) 695 [('relglob', 'rg*', ''), ('re', 're/', ''), ('relre', 'rr', '')])
696 (['.', '.', '.'], []) 696 (['.', '.', '.'], ['.'])
697 ''' 697 '''
698 r, d = _patternrootsanddirs(kindpats) 698 r, d = _patternrootsanddirs(kindpats)
699 699
700 # Append the parents as non-recursive/exact directories, since they must be 700 # Append the parents as non-recursive/exact directories, since they must be
701 # scanned to get to either the roots or the other exact directories. 701 # scanned to get to either the roots or the other exact directories.
702 d.extend(util.dirs(d)) 702 d.extend(util.dirs(d))
703 d.extend(util.dirs(r)) 703 d.extend(util.dirs(r))
704 # util.dirs() does not include the root directory, so add it manually
705 d.append('.')
704 706
705 return r, d 707 return r, d
706 708
707 def _explicitfiles(kindpats): 709 def _explicitfiles(kindpats):
708 '''Returns the potential explicit filenames from the patterns. 710 '''Returns the potential explicit filenames from the patterns.