16 parser, |
16 parser, |
17 pycompat, |
17 pycompat, |
18 smartset, |
18 smartset, |
19 util, |
19 util, |
20 ) |
20 ) |
21 from .utils import ( |
21 from .utils import stringutil |
22 stringutil, |
|
23 ) |
|
24 |
22 |
25 elements = { |
23 elements = { |
26 # token-type: binding-strength, primary, prefix, infix, suffix |
24 # token-type: binding-strength, primary, prefix, infix, suffix |
27 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), |
25 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), |
28 "[": (21, None, None, ("subscript", 1, "]"), None), |
26 "[": (21, None, None, ("subscript", 1, "]"), None), |
29 "#": (21, None, None, ("relation", 21), None), |
27 "#": (21, None, None, ("relation", 21), None), |
30 "##": (20, None, None, ("_concat", 20), None), |
28 "##": (20, None, None, ("_concat", 20), None), |
31 "~": (18, None, None, ("ancestor", 18), None), |
29 "~": (18, None, None, ("ancestor", 18), None), |
32 "^": (18, None, None, ("parent", 18), "parentpost"), |
30 "^": (18, None, None, ("parent", 18), "parentpost"), |
33 "-": (5, None, ("negate", 19), ("minus", 5), None), |
31 "-": (5, None, ("negate", 19), ("minus", 5), None), |
34 "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17), |
32 "::": ( |
35 "dagrangepost"), |
33 17, |
36 "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17), |
34 "dagrangeall", |
37 "dagrangepost"), |
35 ("dagrangepre", 17), |
|
36 ("dagrange", 17), |
|
37 "dagrangepost", |
|
38 ), |
|
39 "..": ( |
|
40 17, |
|
41 "dagrangeall", |
|
42 ("dagrangepre", 17), |
|
43 ("dagrange", 17), |
|
44 "dagrangepost", |
|
45 ), |
38 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"), |
46 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"), |
39 "not": (10, None, ("not", 10), None, None), |
47 "not": (10, None, ("not", 10), None, None), |
40 "!": (10, None, ("not", 10), None, None), |
48 "!": (10, None, ("not", 10), None, None), |
41 "and": (5, None, None, ("and", 5), None), |
49 "and": (5, None, None, ("and", 5), None), |
42 "&": (5, None, None, ("and", 5), None), |
50 "&": (5, None, None, ("and", 5), None), |
59 |
67 |
60 _quoteletters = {'"', "'"} |
68 _quoteletters = {'"', "'"} |
61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%")) |
69 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%")) |
62 |
70 |
63 # default set of valid characters for the initial letter of symbols |
71 # default set of valid characters for the initial letter of symbols |
64 _syminitletters = set(pycompat.iterbytestr( |
72 _syminitletters = set( |
65 pycompat.sysbytes(string.ascii_letters) + |
73 pycompat.iterbytestr( |
66 pycompat.sysbytes(string.digits) + |
74 pycompat.sysbytes(string.ascii_letters) |
67 '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256))) |
75 + pycompat.sysbytes(string.digits) |
|
76 + '._@' |
|
77 ) |
|
78 ) | set(map(pycompat.bytechr, pycompat.xrange(128, 256))) |
68 |
79 |
69 # default set of valid characters for non-initial letters of symbols |
80 # default set of valid characters for non-initial letters of symbols |
70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/')) |
81 _symletters = _syminitletters | set(pycompat.iterbytestr('-/')) |
|
82 |
71 |
83 |
72 def tokenize(program, lookup=None, syminitletters=None, symletters=None): |
84 def tokenize(program, lookup=None, syminitletters=None, symletters=None): |
73 ''' |
85 ''' |
74 Parse a revset statement into a stream of tokens |
86 Parse a revset statement into a stream of tokens |
75 |
87 |
89 >>> list(tokenize(b"@::")) |
101 >>> list(tokenize(b"@::")) |
90 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] |
102 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] |
91 |
103 |
92 ''' |
104 ''' |
93 if not isinstance(program, bytes): |
105 if not isinstance(program, bytes): |
94 raise error.ProgrammingError('revset statement must be bytes, got %r' |
106 raise error.ProgrammingError( |
95 % program) |
107 'revset statement must be bytes, got %r' % program |
|
108 ) |
96 program = pycompat.bytestr(program) |
109 program = pycompat.bytestr(program) |
97 if syminitletters is None: |
110 if syminitletters is None: |
98 syminitletters = _syminitletters |
111 syminitletters = _syminitletters |
99 if symletters is None: |
112 if symletters is None: |
100 symletters = _symletters |
113 symletters = _symletters |
115 return |
128 return |
116 |
129 |
117 pos, l = 0, len(program) |
130 pos, l = 0, len(program) |
118 while pos < l: |
131 while pos < l: |
119 c = program[pos] |
132 c = program[pos] |
120 if c.isspace(): # skip inter-token whitespace |
133 if c.isspace(): # skip inter-token whitespace |
121 pass |
134 pass |
122 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully |
135 elif ( |
|
136 c == ':' and program[pos : pos + 2] == '::' |
|
137 ): # look ahead carefully |
123 yield ('::', None, pos) |
138 yield ('::', None, pos) |
124 pos += 1 # skip ahead |
139 pos += 1 # skip ahead |
125 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully |
140 elif ( |
|
141 c == '.' and program[pos : pos + 2] == '..' |
|
142 ): # look ahead carefully |
126 yield ('..', None, pos) |
143 yield ('..', None, pos) |
127 pos += 1 # skip ahead |
144 pos += 1 # skip ahead |
128 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully |
145 elif ( |
|
146 c == '#' and program[pos : pos + 2] == '##' |
|
147 ): # look ahead carefully |
129 yield ('##', None, pos) |
148 yield ('##', None, pos) |
130 pos += 1 # skip ahead |
149 pos += 1 # skip ahead |
131 elif c in _simpleopletters: # handle simple operators |
150 elif c in _simpleopletters: # handle simple operators |
132 yield (c, None, pos) |
151 yield (c, None, pos) |
133 elif (c in _quoteletters or c == 'r' and |
152 elif ( |
134 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings |
153 c in _quoteletters |
|
154 or c == 'r' |
|
155 and program[pos : pos + 2] in ("r'", 'r"') |
|
156 ): # handle quoted strings |
135 if c == 'r': |
157 if c == 'r': |
136 pos += 1 |
158 pos += 1 |
137 c = program[pos] |
159 c = program[pos] |
138 decode = lambda x: x |
160 decode = lambda x: x |
139 else: |
161 else: |
140 decode = parser.unescapestr |
162 decode = parser.unescapestr |
141 pos += 1 |
163 pos += 1 |
142 s = pos |
164 s = pos |
143 while pos < l: # find closing quote |
165 while pos < l: # find closing quote |
144 d = program[pos] |
166 d = program[pos] |
145 if d == '\\': # skip over escaped characters |
167 if d == '\\': # skip over escaped characters |
146 pos += 2 |
168 pos += 2 |
147 continue |
169 continue |
148 if d == c: |
170 if d == c: |
149 yield ('string', decode(program[s:pos]), s) |
171 yield ('string', decode(program[s:pos]), s) |
150 break |
172 break |
153 raise error.ParseError(_("unterminated string"), s) |
175 raise error.ParseError(_("unterminated string"), s) |
154 # gather up a symbol/keyword |
176 # gather up a symbol/keyword |
155 elif c in syminitletters: |
177 elif c in syminitletters: |
156 s = pos |
178 s = pos |
157 pos += 1 |
179 pos += 1 |
158 while pos < l: # find end of symbol |
180 while pos < l: # find end of symbol |
159 d = program[pos] |
181 d = program[pos] |
160 if d not in symletters: |
182 if d not in symletters: |
161 break |
183 break |
162 if d == '.' and program[pos - 1] == '.': # special case for .. |
184 if d == '.' and program[pos - 1] == '.': # special case for .. |
163 pos -= 1 |
185 pos -= 1 |
164 break |
186 break |
165 pos += 1 |
187 pos += 1 |
166 sym = program[s:pos] |
188 sym = program[s:pos] |
167 if sym in keywords: # operator keywords |
189 if sym in keywords: # operator keywords |
168 yield (sym, None, s) |
190 yield (sym, None, s) |
169 elif '-' in sym: |
191 elif '-' in sym: |
170 # some jerk gave us foo-bar-baz, try to check if it's a symbol |
192 # some jerk gave us foo-bar-baz, try to check if it's a symbol |
171 if lookup and lookup(sym): |
193 if lookup and lookup(sym): |
172 # looks like a real symbol |
194 # looks like a real symbol |
173 yield ('symbol', sym, s) |
195 yield ('symbol', sym, s) |
174 else: |
196 else: |
175 # looks like an expression |
197 # looks like an expression |
176 parts = sym.split('-') |
198 parts = sym.split('-') |
177 for p in parts[:-1]: |
199 for p in parts[:-1]: |
178 if p: # possible consecutive - |
200 if p: # possible consecutive - |
179 yield ('symbol', p, s) |
201 yield ('symbol', p, s) |
180 s += len(p) |
202 s += len(p) |
181 yield ('-', None, s) |
203 yield ('-', None, s) |
182 s += 1 |
204 s += 1 |
183 if parts[-1]: # possible trailing - |
205 if parts[-1]: # possible trailing - |
184 yield ('symbol', parts[-1], s) |
206 yield ('symbol', parts[-1], s) |
185 else: |
207 else: |
186 yield ('symbol', sym, s) |
208 yield ('symbol', sym, s) |
187 pos -= 1 |
209 pos -= 1 |
188 else: |
210 else: |
189 raise error.ParseError(_("syntax error in revset '%s'") % |
211 raise error.ParseError( |
190 program, pos) |
212 _("syntax error in revset '%s'") % program, pos |
|
213 ) |
191 pos += 1 |
214 pos += 1 |
192 yield ('end', None, pos) |
215 yield ('end', None, pos) |
193 |
216 |
|
217 |
194 # helpers |
218 # helpers |
195 |
219 |
196 _notset = object() |
220 _notset = object() |
|
221 |
197 |
222 |
198 def getsymbol(x): |
223 def getsymbol(x): |
199 if x and x[0] == 'symbol': |
224 if x and x[0] == 'symbol': |
200 return x[1] |
225 return x[1] |
201 raise error.ParseError(_('not a symbol')) |
226 raise error.ParseError(_('not a symbol')) |
202 |
227 |
|
228 |
203 def getstring(x, err): |
229 def getstring(x, err): |
204 if x and (x[0] == 'string' or x[0] == 'symbol'): |
230 if x and (x[0] == 'string' or x[0] == 'symbol'): |
205 return x[1] |
231 return x[1] |
206 raise error.ParseError(err) |
232 raise error.ParseError(err) |
|
233 |
207 |
234 |
208 def getinteger(x, err, default=_notset): |
235 def getinteger(x, err, default=_notset): |
209 if not x and default is not _notset: |
236 if not x and default is not _notset: |
210 return default |
237 return default |
211 try: |
238 try: |
212 return int(getstring(x, err)) |
239 return int(getstring(x, err)) |
213 except ValueError: |
240 except ValueError: |
214 raise error.ParseError(err) |
241 raise error.ParseError(err) |
215 |
242 |
|
243 |
216 def getboolean(x, err): |
244 def getboolean(x, err): |
217 value = stringutil.parsebool(getsymbol(x)) |
245 value = stringutil.parsebool(getsymbol(x)) |
218 if value is not None: |
246 if value is not None: |
219 return value |
247 return value |
220 raise error.ParseError(err) |
248 raise error.ParseError(err) |
|
249 |
221 |
250 |
222 def getlist(x): |
251 def getlist(x): |
223 if not x: |
252 if not x: |
224 return [] |
253 return [] |
225 if x[0] == 'list': |
254 if x[0] == 'list': |
226 return list(x[1:]) |
255 return list(x[1:]) |
227 return [x] |
256 return [x] |
|
257 |
228 |
258 |
229 def getrange(x, err): |
259 def getrange(x, err): |
230 if not x: |
260 if not x: |
231 raise error.ParseError(err) |
261 raise error.ParseError(err) |
232 op = x[0] |
262 op = x[0] |
250 n = getinteger(x, err1) |
281 n = getinteger(x, err1) |
251 return n, n |
282 return n, n |
252 a, b = getrange(x, err1) |
283 a, b = getrange(x, err1) |
253 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast) |
284 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast) |
254 |
285 |
|
286 |
255 def getargs(x, min, max, err): |
287 def getargs(x, min, max, err): |
256 l = getlist(x) |
288 l = getlist(x) |
257 if len(l) < min or (max >= 0 and len(l) > max): |
289 if len(l) < min or (max >= 0 and len(l) > max): |
258 raise error.ParseError(err) |
290 raise error.ParseError(err) |
259 return l |
291 return l |
260 |
292 |
|
293 |
261 def getargsdict(x, funcname, keys): |
294 def getargsdict(x, funcname, keys): |
262 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), |
295 return parser.buildargsdict( |
263 keyvaluenode='keyvalue', keynode='symbol') |
296 getlist(x), |
|
297 funcname, |
|
298 parser.splitargspec(keys), |
|
299 keyvaluenode='keyvalue', |
|
300 keynode='symbol', |
|
301 ) |
|
302 |
264 |
303 |
265 # cache of {spec: raw parsed tree} built internally |
304 # cache of {spec: raw parsed tree} built internally |
266 _treecache = {} |
305 _treecache = {} |
|
306 |
267 |
307 |
268 def _cachedtree(spec): |
308 def _cachedtree(spec): |
269 # thread safe because parse() is reentrant and dict.__setitem__() is atomic |
309 # thread safe because parse() is reentrant and dict.__setitem__() is atomic |
270 tree = _treecache.get(spec) |
310 tree = _treecache.get(spec) |
271 if tree is None: |
311 if tree is None: |
272 _treecache[spec] = tree = parse(spec) |
312 _treecache[spec] = tree = parse(spec) |
273 return tree |
313 return tree |
274 |
314 |
|
315 |
275 def _build(tmplspec, *repls): |
316 def _build(tmplspec, *repls): |
276 """Create raw parsed tree from a template revset statement |
317 """Create raw parsed tree from a template revset statement |
277 |
318 |
278 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2')) |
319 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2')) |
279 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2')) |
320 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2')) |
280 """ |
321 """ |
281 template = _cachedtree(tmplspec) |
322 template = _cachedtree(tmplspec) |
282 return parser.buildtree(template, ('symbol', '_'), *repls) |
323 return parser.buildtree(template, ('symbol', '_'), *repls) |
|
324 |
283 |
325 |
284 def _match(patspec, tree): |
326 def _match(patspec, tree): |
285 """Test if a tree matches the given pattern statement; return the matches |
327 """Test if a tree matches the given pattern statement; return the matches |
286 |
328 |
287 >>> _match(b'f(_)', parse(b'f()')) |
329 >>> _match(b'f(_)', parse(b'f()')) |
288 >>> _match(b'f(_)', parse(b'f(1)')) |
330 >>> _match(b'f(_)', parse(b'f(1)')) |
289 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')] |
331 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')] |
290 >>> _match(b'f(_)', parse(b'f(1, 2)')) |
332 >>> _match(b'f(_)', parse(b'f(1, 2)')) |
291 """ |
333 """ |
292 pattern = _cachedtree(patspec) |
334 pattern = _cachedtree(patspec) |
293 return parser.matchtree(pattern, tree, ('symbol', '_'), |
335 return parser.matchtree( |
294 {'keyvalue', 'list'}) |
336 pattern, tree, ('symbol', '_'), {'keyvalue', 'list'} |
|
337 ) |
|
338 |
295 |
339 |
296 def _matchonly(revs, bases): |
340 def _matchonly(revs, bases): |
297 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases)) |
341 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases)) |
|
342 |
298 |
343 |
299 def _fixops(x): |
344 def _fixops(x): |
300 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be |
345 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be |
301 handled well by our simple top-down parser""" |
346 handled well by our simple top-down parser""" |
302 if not isinstance(x, tuple): |
347 if not isinstance(x, tuple): |
369 return (op, x[1], _analyze(x[2])) |
422 return (op, x[1], _analyze(x[2])) |
370 elif op == 'func': |
423 elif op == 'func': |
371 return (op, x[1], _analyze(x[2])) |
424 return (op, x[1], _analyze(x[2])) |
372 raise ValueError('invalid operator %r' % op) |
425 raise ValueError('invalid operator %r' % op) |
373 |
426 |
|
427 |
374 def analyze(x): |
428 def analyze(x): |
375 """Transform raw parsed tree to evaluatable tree which can be fed to |
429 """Transform raw parsed tree to evaluatable tree which can be fed to |
376 optimize() or getset() |
430 optimize() or getset() |
377 |
431 |
378 All pseudo operations should be mapped to real operations or functions |
432 All pseudo operations should be mapped to real operations or functions |
379 defined in methods or symbols table respectively. |
433 defined in methods or symbols table respectively. |
380 """ |
434 """ |
381 return _analyze(x) |
435 return _analyze(x) |
382 |
436 |
|
437 |
383 def _optimize(x): |
438 def _optimize(x): |
384 if x is None: |
439 if x is None: |
385 return 0, x |
440 return 0, x |
386 |
441 |
387 op = x[0] |
442 op = x[0] |
388 if op in ('string', 'symbol', 'smartset'): |
443 if op in ('string', 'symbol', 'smartset'): |
389 return 0.5, x # single revisions are small |
444 return 0.5, x # single revisions are small |
390 elif op == 'and': |
445 elif op == 'and': |
391 wa, ta = _optimize(x[1]) |
446 wa, ta = _optimize(x[1]) |
392 wb, tb = _optimize(x[2]) |
447 wb, tb = _optimize(x[2]) |
393 w = min(wa, wb) |
448 w = min(wa, wb) |
394 |
449 |
476 return w + wa, _build('_commonancestorheads(_)', m[1]) |
533 return w + wa, _build('_commonancestorheads(_)', m[1]) |
477 |
534 |
478 return w + wa, (op, x[1], ta) |
535 return w + wa, (op, x[1], ta) |
479 raise ValueError('invalid operator %r' % op) |
536 raise ValueError('invalid operator %r' % op) |
480 |
537 |
|
538 |
481 def optimize(tree): |
539 def optimize(tree): |
482 """Optimize evaluatable tree |
540 """Optimize evaluatable tree |
483 |
541 |
484 All pseudo operations should be transformed beforehand. |
542 All pseudo operations should be transformed beforehand. |
485 """ |
543 """ |
486 _weight, newtree = _optimize(tree) |
544 _weight, newtree = _optimize(tree) |
487 return newtree |
545 return newtree |
488 |
546 |
|
547 |
489 # the set of valid characters for the initial letter of symbols in |
548 # the set of valid characters for the initial letter of symbols in |
490 # alias declarations and definitions |
549 # alias declarations and definitions |
491 _aliassyminitletters = _syminitletters | {'$'} |
550 _aliassyminitletters = _syminitletters | {'$'} |
|
551 |
492 |
552 |
493 def _parsewith(spec, lookup=None, syminitletters=None): |
553 def _parsewith(spec, lookup=None, syminitletters=None): |
494 """Generate a parse tree of given spec with given tokenizing options |
554 """Generate a parse tree of given spec with given tokenizing options |
495 |
555 |
496 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters) |
556 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters) |
505 ParseError: ('invalid token', 4) |
565 ParseError: ('invalid token', 4) |
506 """ |
566 """ |
507 if lookup and spec.startswith('revset(') and spec.endswith(')'): |
567 if lookup and spec.startswith('revset(') and spec.endswith(')'): |
508 lookup = None |
568 lookup = None |
509 p = parser.parser(elements) |
569 p = parser.parser(elements) |
510 tree, pos = p.parse(tokenize(spec, lookup=lookup, |
570 tree, pos = p.parse( |
511 syminitletters=syminitletters)) |
571 tokenize(spec, lookup=lookup, syminitletters=syminitletters) |
|
572 ) |
512 if pos != len(spec): |
573 if pos != len(spec): |
513 raise error.ParseError(_('invalid token'), pos) |
574 raise error.ParseError(_('invalid token'), pos) |
514 return _fixops(parser.simplifyinfixops(tree, ('list', 'or'))) |
575 return _fixops(parser.simplifyinfixops(tree, ('list', 'or'))) |
515 |
576 |
|
577 |
516 class _aliasrules(parser.basealiasrules): |
578 class _aliasrules(parser.basealiasrules): |
517 """Parsing and expansion rule set of revset aliases""" |
579 """Parsing and expansion rule set of revset aliases""" |
|
580 |
518 _section = _('revset alias') |
581 _section = _('revset alias') |
519 |
582 |
520 @staticmethod |
583 @staticmethod |
521 def _parse(spec): |
584 def _parse(spec): |
522 """Parse alias declaration/definition ``spec`` |
585 """Parse alias declaration/definition ``spec`` |
644 return _formatargtype('d', data[0]) |
717 return _formatargtype('d', data[0]) |
645 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data) |
718 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data) |
646 except (TypeError, ValueError): |
719 except (TypeError, ValueError): |
647 raise error.ParseError(_('invalid argument for revspec')) |
720 raise error.ParseError(_('invalid argument for revspec')) |
648 |
721 |
|
722 |
649 def _formatparamexp(args, t): |
723 def _formatparamexp(args, t): |
650 return ', '.join(_formatargtype(t, a) for a in args) |
724 return ', '.join(_formatargtype(t, a) for a in args) |
|
725 |
651 |
726 |
652 _formatlistfuncs = { |
727 _formatlistfuncs = { |
653 'l': _formatlistexp, |
728 'l': _formatlistexp, |
654 'p': _formatparamexp, |
729 'p': _formatparamexp, |
655 } |
730 } |
|
731 |
656 |
732 |
657 def formatspec(expr, *args): |
733 def formatspec(expr, *args): |
658 ''' |
734 ''' |
659 This is a convenience function for using revsets internally, and |
735 This is a convenience function for using revsets internally, and |
660 escapes arguments appropriately. Aliases are intentionally ignored |
736 escapes arguments appropriately. Aliases are intentionally ignored |