comparison mercurial/match.py @ 41282:4fab8a7d2d72

match: support rooted globs in hgignore In a .hgignore, "glob:foo" always means "**/foo". This cannot be avoided because there is no syntax like "^" in regexes to say you don't want the implied "**/" (of course one can use regexes, but glob syntax is nice). When you have a long list of fairly specific globs like path/to/some/thing, this has two consequences: 1. unintended files may be ignored (not too common though) 2. matching performance can suffer significantly Here is vanilla hg status timing on a private repository: Using syntax:glob everywhere real 0m2.199s user 0m1.545s sys 0m0.619s When rooting the appropriate globs real 0m1.434s user 0m0.847s sys 0m0.565s (tangentially, none of this shows up in --profile's output. It seems that C code doesn't play well with profiling) The code already supports this but there is no syntax to make use of it, so it seems reasonable to create such syntax. I create a new hgignore syntax "rootglob". Differential Revision: https://phab.mercurial-scm.org/D5493
author Valentin Gatien-Baron <vgatien-baron@janestreet.com>
date Thu, 03 Jan 2019 19:02:46 -0500
parents 074c72a38423
children b7a0efb3c370
comparison
equal deleted inserted replaced
41281:183df3df6031 41282:4fab8a7d2d72
23 from .utils import ( 23 from .utils import (
24 stringutil, 24 stringutil,
25 ) 25 )
26 26
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
28 'rootglob',
28 'listfile', 'listfile0', 'set', 'include', 'subinclude', 29 'listfile', 'listfile0', 'set', 'include', 'subinclude',
29 'rootfilesin') 30 'rootfilesin')
30 cwdrelativepatternkinds = ('relpath', 'glob') 31 cwdrelativepatternkinds = ('relpath', 'glob')
31 32
32 propertycache = util.propertycache 33 propertycache = util.propertycache
219 normalized and rooted patterns and with listfiles expanded.''' 220 normalized and rooted patterns and with listfiles expanded.'''
220 kindpats = [] 221 kindpats = []
221 for kind, pat in [_patsplit(p, default) for p in patterns]: 222 for kind, pat in [_patsplit(p, default) for p in patterns]:
222 if kind in cwdrelativepatternkinds: 223 if kind in cwdrelativepatternkinds:
223 pat = pathutil.canonpath(root, cwd, pat, auditor) 224 pat = pathutil.canonpath(root, cwd, pat, auditor)
224 elif kind in ('relglob', 'path', 'rootfilesin'): 225 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'):
225 pat = util.normpath(pat) 226 pat = util.normpath(pat)
226 elif kind in ('listfile', 'listfile0'): 227 elif kind in ('listfile', 'listfile0'):
227 try: 228 try:
228 files = util.readfile(pat) 229 files = util.readfile(pat)
229 if kind == 'listfile0': 230 if kind == 'listfile0':
1135 return '(?:|.*/)' + _globre(pat) + globsuffix 1136 return '(?:|.*/)' + _globre(pat) + globsuffix
1136 if kind == 'relre': 1137 if kind == 'relre':
1137 if pat.startswith('^'): 1138 if pat.startswith('^'):
1138 return pat 1139 return pat
1139 return '.*' + pat 1140 return '.*' + pat
1140 if kind == 'glob': 1141 if kind in ('glob', 'rootglob'):
1141 return _globre(pat) + globsuffix 1142 return _globre(pat) + globsuffix
1142 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat)) 1143 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat))
1143 1144
1144 def _buildmatch(kindpats, globsuffix, listsubrepos, root): 1145 def _buildmatch(kindpats, globsuffix, listsubrepos, root):
1145 '''Return regexp string and a matcher function for kindpats. 1146 '''Return regexp string and a matcher function for kindpats.
1250 directories. 1251 directories.
1251 ''' 1252 '''
1252 r = [] 1253 r = []
1253 d = [] 1254 d = []
1254 for kind, pat, source in kindpats: 1255 for kind, pat, source in kindpats:
1255 if kind == 'glob': # find the non-glob prefix 1256 if kind in ('glob', 'rootglob'): # find the non-glob prefix
1256 root = [] 1257 root = []
1257 for p in pat.split('/'): 1258 for p in pat.split('/'):
1258 if '[' in p or '{' in p or '*' in p or '?' in p: 1259 if '[' in p or '{' in p or '*' in p or '?' in p:
1259 break 1260 break
1260 root.append(p) 1261 root.append(p)
1349 1350
1350 syntax: regexp # defaults following lines to non-rooted regexps 1351 syntax: regexp # defaults following lines to non-rooted regexps
1351 syntax: glob # defaults following lines to non-rooted globs 1352 syntax: glob # defaults following lines to non-rooted globs
1352 re:pattern # non-rooted regular expression 1353 re:pattern # non-rooted regular expression
1353 glob:pattern # non-rooted glob 1354 glob:pattern # non-rooted glob
1355 rootglob:pat # rooted glob (same root as ^ in regexps)
1354 pattern # pattern of the current default type 1356 pattern # pattern of the current default type
1355 1357
1356 if sourceinfo is set, returns a list of tuples: 1358 if sourceinfo is set, returns a list of tuples:
1357 (pattern, lineno, originalline). This is useful to debug ignore patterns. 1359 (pattern, lineno, originalline). This is useful to debug ignore patterns.
1358 ''' 1360 '''
1359 1361
1360 syntaxes = { 1362 syntaxes = {
1361 're': 'relre:', 1363 're': 'relre:',
1362 'regexp': 'relre:', 1364 'regexp': 'relre:',
1363 'glob': 'relglob:', 1365 'glob': 'relglob:',
1366 'rootglob': 'rootglob:',
1364 'include': 'include', 1367 'include': 'include',
1365 'subinclude': 'subinclude', 1368 'subinclude': 'subinclude',
1366 } 1369 }
1367 syntax = 'relre:' 1370 syntax = 'relre:'
1368 patterns = [] 1371 patterns = []