comparison mercurial/match.py @ 38954:5a7df82de142

includematcher: separate "parents" from "dirs" A future patch will make use of this separation so that we can make more intelligent decisions about what to investigate/load when the matcher is in use. Currently, even with this patch, we typically use the 'visitdir' call to identify if we can skip some directory, something along the lines of: for f in all_items: if match.visitdir(f): <do stuff> This can be slower than we'd like if there are a lot of items; it requires N calls to match.visitdir in the best case. Commonly, especially with 'narrow', we have a situation where we do some work for the directory, possibly just loading it from disk (when using treemanifests) and then check if we should be interacting with it at all, which can be a huge slowdown in some pathological cases. Differential Revision: https://phab.mercurial-scm.org/D4129
author spectral <spectral@google.com>
date Mon, 06 Aug 2018 12:52:22 -0700
parents a8bfaf592033
children 081cc9a95b65
comparison
equal deleted inserted replaced
38953:987d3a4b989f 38954:5a7df82de142
443 super(includematcher, self).__init__(root, cwd, badfn) 443 super(includematcher, self).__init__(root, cwd, badfn)
444 444
445 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)', 445 self._pats, self.matchfn = _buildmatch(kindpats, '(?:/|$)',
446 listsubrepos, root) 446 listsubrepos, root)
447 self._prefix = _prefix(kindpats) 447 self._prefix = _prefix(kindpats)
448 roots, dirs = _rootsanddirs(kindpats) 448 roots, dirs, parents = _rootsdirsandparents(kindpats)
449 # roots are directories which are recursively included. 449 # roots are directories which are recursively included.
450 self._roots = set(roots) 450 self._roots = set(roots)
451 # dirs are directories which are non-recursively included. 451 # dirs are directories which are non-recursively included.
452 self._dirs = set(dirs) 452 self._dirs = set(dirs)
453 # parents are directories which are non-recursively included because
454 # they are needed to get to items in _dirs or _roots.
455 self._parents = set(parents)
453 456
454 def visitdir(self, dir): 457 def visitdir(self, dir):
455 if self._prefix and dir in self._roots: 458 if self._prefix and dir in self._roots:
456 return 'all' 459 return 'all'
457 return ('.' in self._roots or 460 return ('.' in self._roots or
458 dir in self._roots or 461 dir in self._roots or
459 dir in self._dirs or 462 dir in self._dirs or
463 dir in self._parents or
460 any(parentdir in self._roots 464 any(parentdir in self._roots
461 for parentdir in util.finddirs(dir))) 465 for parentdir in util.finddirs(dir)))
462 466
463 @encoding.strmethod 467 @encoding.strmethod
464 def __repr__(self): 468 def __repr__(self):
1002 def _roots(kindpats): 1006 def _roots(kindpats):
1003 '''Returns root directories to match recursively from the given patterns.''' 1007 '''Returns root directories to match recursively from the given patterns.'''
1004 roots, dirs = _patternrootsanddirs(kindpats) 1008 roots, dirs = _patternrootsanddirs(kindpats)
1005 return roots 1009 return roots
1006 1010
1007 def _rootsanddirs(kindpats): 1011 def _rootsdirsandparents(kindpats):
1008 '''Returns roots and exact directories from patterns. 1012 '''Returns roots and exact directories from patterns.
1009 1013
1010 roots are directories to match recursively, whereas exact directories should 1014 roots are directories to match recursively, whereas exact directories should
1011 be matched non-recursively. The returned (roots, dirs) tuple will also 1015 be matched non-recursively. The returned (roots, dirs) tuple will also
1012 include directories that need to be implicitly considered as either, such as 1016 include directories that need to be implicitly considered as either, such as
1013 parent directories. 1017 parent directories.
1014 1018
1015 >>> _rootsanddirs( 1019 >>> _rootsdirsandparents(
1016 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''), 1020 ... [(b'glob', b'g/h/*', b''), (b'glob', b'g/h', b''),
1017 ... (b'glob', b'g*', b'')]) 1021 ... (b'glob', b'g*', b'')])
1018 (['g/h', 'g/h', '.'], ['g', '.']) 1022 (['g/h', 'g/h', '.'], [], ['g', '.'])
1019 >>> _rootsanddirs( 1023 >>> _rootsdirsandparents(
1020 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')]) 1024 ... [(b'rootfilesin', b'g/h', b''), (b'rootfilesin', b'', b'')])
1021 ([], ['g/h', '.', 'g', '.']) 1025 ([], ['g/h', '.'], ['g', '.'])
1022 >>> _rootsanddirs( 1026 >>> _rootsdirsandparents(
1023 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''), 1027 ... [(b'relpath', b'r', b''), (b'path', b'p/p', b''),
1024 ... (b'path', b'', b'')]) 1028 ... (b'path', b'', b'')])
1025 (['r', 'p/p', '.'], ['p', '.']) 1029 (['r', 'p/p', '.'], [], ['p', '.'])
1026 >>> _rootsanddirs( 1030 >>> _rootsdirsandparents(
1027 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''), 1031 ... [(b'relglob', b'rg*', b''), (b're', b're/', b''),
1028 ... (b'relre', b'rr', b'')]) 1032 ... (b'relre', b'rr', b'')])
1029 (['.', '.', '.'], ['.']) 1033 (['.', '.', '.'], [], ['.'])
1030 ''' 1034 '''
1031 r, d = _patternrootsanddirs(kindpats) 1035 r, d = _patternrootsanddirs(kindpats)
1032 1036
1037 p = []
1033 # Append the parents as non-recursive/exact directories, since they must be 1038 # Append the parents as non-recursive/exact directories, since they must be
1034 # scanned to get to either the roots or the other exact directories. 1039 # scanned to get to either the roots or the other exact directories.
1035 d.extend(util.dirs(d)) 1040 p.extend(util.dirs(d))
1036 d.extend(util.dirs(r)) 1041 p.extend(util.dirs(r))
1037 # util.dirs() does not include the root directory, so add it manually 1042 # util.dirs() does not include the root directory, so add it manually
1038 d.append('.') 1043 p.append('.')
1039 1044
1040 return r, d 1045 return r, d, p
1041 1046
1042 def _explicitfiles(kindpats): 1047 def _explicitfiles(kindpats):
1043 '''Returns the potential explicit filenames from the patterns. 1048 '''Returns the potential explicit filenames from the patterns.
1044 1049
1045 >>> _explicitfiles([(b'path', b'foo/bar', b'')]) 1050 >>> _explicitfiles([(b'path', b'foo/bar', b'')])