Mercurial > hg
comparison mercurial/match.py @ 41282:4fab8a7d2d72
match: support rooted globs in hgignore
In a .hgignore, "glob:foo" always means "**/foo". This cannot be
avoided because there is no syntax like "^" in regexes to say you
don't want the implied "**/" (of course one can use regexes, but glob
syntax is nice).
When you have a long list of fairly specific globs like
path/to/some/thing, this has two consequences:
1. unintended files may be ignored (not too common though)
2. matching performance can suffer significantly
Here is vanilla hg status timing on a private repository:
Using syntax:glob everywhere
real 0m2.199s
user 0m1.545s
sys 0m0.619s
When rooting the appropriate globs
real 0m1.434s
user 0m0.847s
sys 0m0.565s
(tangentially, none of this shows up in --profile's output. It
seems that C code doesn't play well with profiling)
The code already supports this but there is no syntax to make use of
it, so it seems reasonable to create such syntax. I create a new
hgignore syntax "rootglob".
Differential Revision: https://phab.mercurial-scm.org/D5493
author | Valentin Gatien-Baron <vgatien-baron@janestreet.com> |
---|---|
date | Thu, 03 Jan 2019 19:02:46 -0500 |
parents | 074c72a38423 |
children | b7a0efb3c370 |
comparison
equal
deleted
inserted
replaced
41281:183df3df6031 | 41282:4fab8a7d2d72 |
---|---|
23 from .utils import ( | 23 from .utils import ( |
24 stringutil, | 24 stringutil, |
25 ) | 25 ) |
26 | 26 |
27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', | 27 allpatternkinds = ('re', 'glob', 'path', 'relglob', 'relpath', 'relre', |
28 'rootglob', | |
28 'listfile', 'listfile0', 'set', 'include', 'subinclude', | 29 'listfile', 'listfile0', 'set', 'include', 'subinclude', |
29 'rootfilesin') | 30 'rootfilesin') |
30 cwdrelativepatternkinds = ('relpath', 'glob') | 31 cwdrelativepatternkinds = ('relpath', 'glob') |
31 | 32 |
32 propertycache = util.propertycache | 33 propertycache = util.propertycache |
219 normalized and rooted patterns and with listfiles expanded.''' | 220 normalized and rooted patterns and with listfiles expanded.''' |
220 kindpats = [] | 221 kindpats = [] |
221 for kind, pat in [_patsplit(p, default) for p in patterns]: | 222 for kind, pat in [_patsplit(p, default) for p in patterns]: |
222 if kind in cwdrelativepatternkinds: | 223 if kind in cwdrelativepatternkinds: |
223 pat = pathutil.canonpath(root, cwd, pat, auditor) | 224 pat = pathutil.canonpath(root, cwd, pat, auditor) |
224 elif kind in ('relglob', 'path', 'rootfilesin'): | 225 elif kind in ('relglob', 'path', 'rootfilesin', 'rootglob'): |
225 pat = util.normpath(pat) | 226 pat = util.normpath(pat) |
226 elif kind in ('listfile', 'listfile0'): | 227 elif kind in ('listfile', 'listfile0'): |
227 try: | 228 try: |
228 files = util.readfile(pat) | 229 files = util.readfile(pat) |
229 if kind == 'listfile0': | 230 if kind == 'listfile0': |
1135 return '(?:|.*/)' + _globre(pat) + globsuffix | 1136 return '(?:|.*/)' + _globre(pat) + globsuffix |
1136 if kind == 'relre': | 1137 if kind == 'relre': |
1137 if pat.startswith('^'): | 1138 if pat.startswith('^'): |
1138 return pat | 1139 return pat |
1139 return '.*' + pat | 1140 return '.*' + pat |
1140 if kind == 'glob': | 1141 if kind in ('glob', 'rootglob'): |
1141 return _globre(pat) + globsuffix | 1142 return _globre(pat) + globsuffix |
1142 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat)) | 1143 raise error.ProgrammingError('not a regex pattern: %s:%s' % (kind, pat)) |
1143 | 1144 |
1144 def _buildmatch(kindpats, globsuffix, listsubrepos, root): | 1145 def _buildmatch(kindpats, globsuffix, listsubrepos, root): |
1145 '''Return regexp string and a matcher function for kindpats. | 1146 '''Return regexp string and a matcher function for kindpats. |
1250 directories. | 1251 directories. |
1251 ''' | 1252 ''' |
1252 r = [] | 1253 r = [] |
1253 d = [] | 1254 d = [] |
1254 for kind, pat, source in kindpats: | 1255 for kind, pat, source in kindpats: |
1255 if kind == 'glob': # find the non-glob prefix | 1256 if kind in ('glob', 'rootglob'): # find the non-glob prefix |
1256 root = [] | 1257 root = [] |
1257 for p in pat.split('/'): | 1258 for p in pat.split('/'): |
1258 if '[' in p or '{' in p or '*' in p or '?' in p: | 1259 if '[' in p or '{' in p or '*' in p or '?' in p: |
1259 break | 1260 break |
1260 root.append(p) | 1261 root.append(p) |
1349 | 1350 |
1350 syntax: regexp # defaults following lines to non-rooted regexps | 1351 syntax: regexp # defaults following lines to non-rooted regexps |
1351 syntax: glob # defaults following lines to non-rooted globs | 1352 syntax: glob # defaults following lines to non-rooted globs |
1352 re:pattern # non-rooted regular expression | 1353 re:pattern # non-rooted regular expression |
1353 glob:pattern # non-rooted glob | 1354 glob:pattern # non-rooted glob |
1355 rootglob:pat # rooted glob (same root as ^ in regexps) | |
1354 pattern # pattern of the current default type | 1356 pattern # pattern of the current default type |
1355 | 1357 |
1356 if sourceinfo is set, returns a list of tuples: | 1358 if sourceinfo is set, returns a list of tuples: |
1357 (pattern, lineno, originalline). This is useful to debug ignore patterns. | 1359 (pattern, lineno, originalline). This is useful to debug ignore patterns. |
1358 ''' | 1360 ''' |
1359 | 1361 |
1360 syntaxes = { | 1362 syntaxes = { |
1361 're': 'relre:', | 1363 're': 'relre:', |
1362 'regexp': 'relre:', | 1364 'regexp': 'relre:', |
1363 'glob': 'relglob:', | 1365 'glob': 'relglob:', |
1366 'rootglob': 'rootglob:', | |
1364 'include': 'include', | 1367 'include': 'include', |
1365 'subinclude': 'subinclude', | 1368 'subinclude': 'subinclude', |
1366 } | 1369 } |
1367 syntax = 'relre:' | 1370 syntax = 'relre:' |
1368 patterns = [] | 1371 patterns = [] |