Mercurial > hg
comparison mercurial/revsetlang.py @ 31024:0b8356705de6
revset: split language services to revsetlang module (API)
New revsetlang module hosts parser, tokenizer, and miscellaneous functions
working on parsed tree. It does not include functions for evaluation such as
getset() and match().
2288 mercurial/revset.py
684 mercurial/revsetlang.py
2972 total
get*() functions are aliased since they are common in revset.py.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 19 Feb 2017 18:19:33 +0900 |
parents | mercurial/revset.py@17b5cda5a84a |
children | 77270ec0cdd9 |
comparison
equal
deleted
inserted
replaced
31023:aea06029919e | 31024:0b8356705de6 |
---|---|
1 # revsetlang.py - parser, tokenizer and utility for revision set language | |
2 # | |
3 # Copyright 2010 Matt Mackall <mpm@selenic.com> | |
4 # | |
5 # This software may be used and distributed according to the terms of the | |
6 # GNU General Public License version 2 or any later version. | |
7 | |
8 from __future__ import absolute_import | |
9 | |
10 import string | |
11 | |
12 from .i18n import _ | |
13 from . import ( | |
14 error, | |
15 node, | |
16 parser, | |
17 pycompat, | |
18 ) | |
19 | |
20 elements = { | |
21 # token-type: binding-strength, primary, prefix, infix, suffix | |
22 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None), | |
23 "##": (20, None, None, ("_concat", 20), None), | |
24 "~": (18, None, None, ("ancestor", 18), None), | |
25 "^": (18, None, None, ("parent", 18), "parentpost"), | |
26 "-": (5, None, ("negate", 19), ("minus", 5), None), | |
27 "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"), | |
28 "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"), | |
29 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"), | |
30 "not": (10, None, ("not", 10), None, None), | |
31 "!": (10, None, ("not", 10), None, None), | |
32 "and": (5, None, None, ("and", 5), None), | |
33 "&": (5, None, None, ("and", 5), None), | |
34 "%": (5, None, None, ("only", 5), "onlypost"), | |
35 "or": (4, None, None, ("or", 4), None), | |
36 "|": (4, None, None, ("or", 4), None), | |
37 "+": (4, None, None, ("or", 4), None), | |
38 "=": (3, None, None, ("keyvalue", 3), None), | |
39 ",": (2, None, None, ("list", 2), None), | |
40 ")": (0, None, None, None, None), | |
41 "symbol": (0, "symbol", None, None, None), | |
42 "string": (0, "string", None, None, None), | |
43 "end": (0, None, None, None, None), | |
44 } | |
45 | |
46 keywords = set(['and', 'or', 'not']) | |
47 | |
48 # default set of valid characters for the initial letter of symbols | |
49 _syminitletters = set( | |
50 string.ascii_letters + | |
51 string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256))) | |
52 | |
53 # default set of valid characters for non-initial letters of symbols | |
54 _symletters = _syminitletters | set(pycompat.sysstr('-/')) | |
55 | |
56 def tokenize(program, lookup=None, syminitletters=None, symletters=None): | |
57 ''' | |
58 Parse a revset statement into a stream of tokens | |
59 | |
60 ``syminitletters`` is the set of valid characters for the initial | |
61 letter of symbols. | |
62 | |
63 By default, character ``c`` is recognized as valid for initial | |
64 letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``. | |
65 | |
66 ``symletters`` is the set of valid characters for non-initial | |
67 letters of symbols. | |
68 | |
69 By default, character ``c`` is recognized as valid for non-initial | |
70 letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``. | |
71 | |
72 Check that @ is a valid unquoted token character (issue3686): | |
73 >>> list(tokenize("@::")) | |
74 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)] | |
75 | |
76 ''' | |
77 if syminitletters is None: | |
78 syminitletters = _syminitletters | |
79 if symletters is None: | |
80 symletters = _symletters | |
81 | |
82 if program and lookup: | |
83 # attempt to parse old-style ranges first to deal with | |
84 # things like old-tag which contain query metacharacters | |
85 parts = program.split(':', 1) | |
86 if all(lookup(sym) for sym in parts if sym): | |
87 if parts[0]: | |
88 yield ('symbol', parts[0], 0) | |
89 if len(parts) > 1: | |
90 s = len(parts[0]) | |
91 yield (':', None, s) | |
92 if parts[1]: | |
93 yield ('symbol', parts[1], s + 1) | |
94 yield ('end', None, len(program)) | |
95 return | |
96 | |
97 pos, l = 0, len(program) | |
98 while pos < l: | |
99 c = program[pos] | |
100 if c.isspace(): # skip inter-token whitespace | |
101 pass | |
102 elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully | |
103 yield ('::', None, pos) | |
104 pos += 1 # skip ahead | |
105 elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully | |
106 yield ('..', None, pos) | |
107 pos += 1 # skip ahead | |
108 elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully | |
109 yield ('##', None, pos) | |
110 pos += 1 # skip ahead | |
111 elif c in "():=,-|&+!~^%": # handle simple operators | |
112 yield (c, None, pos) | |
113 elif (c in '"\'' or c == 'r' and | |
114 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings | |
115 if c == 'r': | |
116 pos += 1 | |
117 c = program[pos] | |
118 decode = lambda x: x | |
119 else: | |
120 decode = parser.unescapestr | |
121 pos += 1 | |
122 s = pos | |
123 while pos < l: # find closing quote | |
124 d = program[pos] | |
125 if d == '\\': # skip over escaped characters | |
126 pos += 2 | |
127 continue | |
128 if d == c: | |
129 yield ('string', decode(program[s:pos]), s) | |
130 break | |
131 pos += 1 | |
132 else: | |
133 raise error.ParseError(_("unterminated string"), s) | |
134 # gather up a symbol/keyword | |
135 elif c in syminitletters: | |
136 s = pos | |
137 pos += 1 | |
138 while pos < l: # find end of symbol | |
139 d = program[pos] | |
140 if d not in symletters: | |
141 break | |
142 if d == '.' and program[pos - 1] == '.': # special case for .. | |
143 pos -= 1 | |
144 break | |
145 pos += 1 | |
146 sym = program[s:pos] | |
147 if sym in keywords: # operator keywords | |
148 yield (sym, None, s) | |
149 elif '-' in sym: | |
150 # some jerk gave us foo-bar-baz, try to check if it's a symbol | |
151 if lookup and lookup(sym): | |
152 # looks like a real symbol | |
153 yield ('symbol', sym, s) | |
154 else: | |
155 # looks like an expression | |
156 parts = sym.split('-') | |
157 for p in parts[:-1]: | |
158 if p: # possible consecutive - | |
159 yield ('symbol', p, s) | |
160 s += len(p) | |
161 yield ('-', None, pos) | |
162 s += 1 | |
163 if parts[-1]: # possible trailing - | |
164 yield ('symbol', parts[-1], s) | |
165 else: | |
166 yield ('symbol', sym, s) | |
167 pos -= 1 | |
168 else: | |
169 raise error.ParseError(_("syntax error in revset '%s'") % | |
170 program, pos) | |
171 pos += 1 | |
172 yield ('end', None, pos) | |
173 | |
174 # helpers | |
175 | |
176 _notset = object() | |
177 | |
178 def getsymbol(x): | |
179 if x and x[0] == 'symbol': | |
180 return x[1] | |
181 raise error.ParseError(_('not a symbol')) | |
182 | |
183 def getstring(x, err): | |
184 if x and (x[0] == 'string' or x[0] == 'symbol'): | |
185 return x[1] | |
186 raise error.ParseError(err) | |
187 | |
188 def getinteger(x, err, default=_notset): | |
189 if not x and default is not _notset: | |
190 return default | |
191 try: | |
192 return int(getstring(x, err)) | |
193 except ValueError: | |
194 raise error.ParseError(err) | |
195 | |
196 def getlist(x): | |
197 if not x: | |
198 return [] | |
199 if x[0] == 'list': | |
200 return list(x[1:]) | |
201 return [x] | |
202 | |
203 def getrange(x, err): | |
204 if not x: | |
205 raise error.ParseError(err) | |
206 op = x[0] | |
207 if op == 'range': | |
208 return x[1], x[2] | |
209 elif op == 'rangepre': | |
210 return None, x[1] | |
211 elif op == 'rangepost': | |
212 return x[1], None | |
213 elif op == 'rangeall': | |
214 return None, None | |
215 raise error.ParseError(err) | |
216 | |
217 def getargs(x, min, max, err): | |
218 l = getlist(x) | |
219 if len(l) < min or (max >= 0 and len(l) > max): | |
220 raise error.ParseError(err) | |
221 return l | |
222 | |
223 def getargsdict(x, funcname, keys): | |
224 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), | |
225 keyvaluenode='keyvalue', keynode='symbol') | |
226 | |
227 # Constants for ordering requirement, used in _analyze(): | |
228 # | |
229 # If 'define', any nested functions and operations can change the ordering of | |
230 # the entries in the set. If 'follow', any nested functions and operations | |
231 # should take the ordering specified by the first operand to the '&' operator. | |
232 # | |
233 # For instance, | |
234 # | |
235 # X & (Y | Z) | |
236 # ^ ^^^^^^^ | |
237 # | follow | |
238 # define | |
239 # | |
240 # will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order | |
241 # of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't. | |
242 # | |
243 # 'any' means the order doesn't matter. For instance, | |
244 # | |
245 # X & !Y | |
246 # ^ | |
247 # any | |
248 # | |
249 # 'y()' can either enforce its ordering requirement or take the ordering | |
250 # specified by 'x()' because 'not()' doesn't care the order. | |
251 # | |
252 # Transition of ordering requirement: | |
253 # | |
254 # 1. starts with 'define' | |
255 # 2. shifts to 'follow' by 'x & y' | |
256 # 3. changes back to 'define' on function call 'f(x)' or function-like | |
257 # operation 'x (f) y' because 'f' may have its own ordering requirement | |
258 # for 'x' and 'y' (e.g. 'first(x)') | |
259 # | |
260 anyorder = 'any' # don't care the order | |
261 defineorder = 'define' # should define the order | |
262 followorder = 'follow' # must follow the current order | |
263 | |
264 # transition table for 'x & y', from the current expression 'x' to 'y' | |
265 _tofolloworder = { | |
266 anyorder: anyorder, | |
267 defineorder: followorder, | |
268 followorder: followorder, | |
269 } | |
270 | |
271 def _matchonly(revs, bases): | |
272 """ | |
273 >>> f = lambda *args: _matchonly(*map(parse, args)) | |
274 >>> f('ancestors(A)', 'not ancestors(B)') | |
275 ('list', ('symbol', 'A'), ('symbol', 'B')) | |
276 """ | |
277 if (revs is not None | |
278 and revs[0] == 'func' | |
279 and getsymbol(revs[1]) == 'ancestors' | |
280 and bases is not None | |
281 and bases[0] == 'not' | |
282 and bases[1][0] == 'func' | |
283 and getsymbol(bases[1][1]) == 'ancestors'): | |
284 return ('list', revs[2], bases[1][2]) | |
285 | |
286 def _fixops(x): | |
287 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be | |
288 handled well by our simple top-down parser""" | |
289 if not isinstance(x, tuple): | |
290 return x | |
291 | |
292 op = x[0] | |
293 if op == 'parent': | |
294 # x^:y means (x^) : y, not x ^ (:y) | |
295 # x^: means (x^) :, not x ^ (:) | |
296 post = ('parentpost', x[1]) | |
297 if x[2][0] == 'dagrangepre': | |
298 return _fixops(('dagrange', post, x[2][1])) | |
299 elif x[2][0] == 'rangepre': | |
300 return _fixops(('range', post, x[2][1])) | |
301 elif x[2][0] == 'rangeall': | |
302 return _fixops(('rangepost', post)) | |
303 elif op == 'or': | |
304 # make number of arguments deterministic: | |
305 # x + y + z -> (or x y z) -> (or (list x y z)) | |
306 return (op, _fixops(('list',) + x[1:])) | |
307 | |
308 return (op,) + tuple(_fixops(y) for y in x[1:]) | |
309 | |
310 def _analyze(x, order): | |
311 if x is None: | |
312 return x | |
313 | |
314 op = x[0] | |
315 if op == 'minus': | |
316 return _analyze(('and', x[1], ('not', x[2])), order) | |
317 elif op == 'only': | |
318 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) | |
319 return _analyze(t, order) | |
320 elif op == 'onlypost': | |
321 return _analyze(('func', ('symbol', 'only'), x[1]), order) | |
322 elif op == 'dagrangepre': | |
323 return _analyze(('func', ('symbol', 'ancestors'), x[1]), order) | |
324 elif op == 'dagrangepost': | |
325 return _analyze(('func', ('symbol', 'descendants'), x[1]), order) | |
326 elif op == 'negate': | |
327 s = getstring(x[1], _("can't negate that")) | |
328 return _analyze(('string', '-' + s), order) | |
329 elif op in ('string', 'symbol'): | |
330 return x | |
331 elif op == 'and': | |
332 ta = _analyze(x[1], order) | |
333 tb = _analyze(x[2], _tofolloworder[order]) | |
334 return (op, ta, tb, order) | |
335 elif op == 'or': | |
336 return (op, _analyze(x[1], order), order) | |
337 elif op == 'not': | |
338 return (op, _analyze(x[1], anyorder), order) | |
339 elif op == 'rangeall': | |
340 return (op, None, order) | |
341 elif op in ('rangepre', 'rangepost', 'parentpost'): | |
342 return (op, _analyze(x[1], defineorder), order) | |
343 elif op == 'group': | |
344 return _analyze(x[1], order) | |
345 elif op in ('dagrange', 'range', 'parent', 'ancestor'): | |
346 ta = _analyze(x[1], defineorder) | |
347 tb = _analyze(x[2], defineorder) | |
348 return (op, ta, tb, order) | |
349 elif op == 'list': | |
350 return (op,) + tuple(_analyze(y, order) for y in x[1:]) | |
351 elif op == 'keyvalue': | |
352 return (op, x[1], _analyze(x[2], order)) | |
353 elif op == 'func': | |
354 f = getsymbol(x[1]) | |
355 d = defineorder | |
356 if f == 'present': | |
357 # 'present(set)' is known to return the argument set with no | |
358 # modification, so forward the current order to its argument | |
359 d = order | |
360 return (op, x[1], _analyze(x[2], d), order) | |
361 raise ValueError('invalid operator %r' % op) | |
362 | |
363 def analyze(x, order=defineorder): | |
364 """Transform raw parsed tree to evaluatable tree which can be fed to | |
365 optimize() or getset() | |
366 | |
367 All pseudo operations should be mapped to real operations or functions | |
368 defined in methods or symbols table respectively. | |
369 | |
370 'order' specifies how the current expression 'x' is ordered (see the | |
371 constants defined above.) | |
372 """ | |
373 return _analyze(x, order) | |
374 | |
375 def _optimize(x, small): | |
376 if x is None: | |
377 return 0, x | |
378 | |
379 smallbonus = 1 | |
380 if small: | |
381 smallbonus = .5 | |
382 | |
383 op = x[0] | |
384 if op in ('string', 'symbol'): | |
385 return smallbonus, x # single revisions are small | |
386 elif op == 'and': | |
387 wa, ta = _optimize(x[1], True) | |
388 wb, tb = _optimize(x[2], True) | |
389 order = x[3] | |
390 w = min(wa, wb) | |
391 | |
392 # (::x and not ::y)/(not ::y and ::x) have a fast path | |
393 tm = _matchonly(ta, tb) or _matchonly(tb, ta) | |
394 if tm: | |
395 return w, ('func', ('symbol', 'only'), tm, order) | |
396 | |
397 if tb is not None and tb[0] == 'not': | |
398 return wa, ('difference', ta, tb[1], order) | |
399 | |
400 if wa > wb: | |
401 return w, (op, tb, ta, order) | |
402 return w, (op, ta, tb, order) | |
403 elif op == 'or': | |
404 # fast path for machine-generated expression, that is likely to have | |
405 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' | |
406 order = x[2] | |
407 ws, ts, ss = [], [], [] | |
408 def flushss(): | |
409 if not ss: | |
410 return | |
411 if len(ss) == 1: | |
412 w, t = ss[0] | |
413 else: | |
414 s = '\0'.join(t[1] for w, t in ss) | |
415 y = ('func', ('symbol', '_list'), ('string', s), order) | |
416 w, t = _optimize(y, False) | |
417 ws.append(w) | |
418 ts.append(t) | |
419 del ss[:] | |
420 for y in getlist(x[1]): | |
421 w, t = _optimize(y, False) | |
422 if t is not None and (t[0] == 'string' or t[0] == 'symbol'): | |
423 ss.append((w, t)) | |
424 continue | |
425 flushss() | |
426 ws.append(w) | |
427 ts.append(t) | |
428 flushss() | |
429 if len(ts) == 1: | |
430 return ws[0], ts[0] # 'or' operation is fully optimized out | |
431 # we can't reorder trees by weight because it would change the order. | |
432 # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a") | |
433 # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0])) | |
434 return max(ws), (op, ('list',) + tuple(ts), order) | |
435 elif op == 'not': | |
436 # Optimize not public() to _notpublic() because we have a fast version | |
437 if x[1][:3] == ('func', ('symbol', 'public'), None): | |
438 order = x[1][3] | |
439 newsym = ('func', ('symbol', '_notpublic'), None, order) | |
440 o = _optimize(newsym, not small) | |
441 return o[0], o[1] | |
442 else: | |
443 o = _optimize(x[1], not small) | |
444 order = x[2] | |
445 return o[0], (op, o[1], order) | |
446 elif op == 'rangeall': | |
447 return smallbonus, x | |
448 elif op in ('rangepre', 'rangepost', 'parentpost'): | |
449 o = _optimize(x[1], small) | |
450 order = x[2] | |
451 return o[0], (op, o[1], order) | |
452 elif op in ('dagrange', 'range', 'parent', 'ancestor'): | |
453 wa, ta = _optimize(x[1], small) | |
454 wb, tb = _optimize(x[2], small) | |
455 order = x[3] | |
456 return wa + wb, (op, ta, tb, order) | |
457 elif op == 'list': | |
458 ws, ts = zip(*(_optimize(y, small) for y in x[1:])) | |
459 return sum(ws), (op,) + ts | |
460 elif op == 'keyvalue': | |
461 w, t = _optimize(x[2], small) | |
462 return w, (op, x[1], t) | |
463 elif op == 'func': | |
464 f = getsymbol(x[1]) | |
465 wa, ta = _optimize(x[2], small) | |
466 if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep', | |
467 'keyword', 'outgoing', 'user', 'destination'): | |
468 w = 10 # slow | |
469 elif f in ('modifies', 'adds', 'removes'): | |
470 w = 30 # slower | |
471 elif f == "contains": | |
472 w = 100 # very slow | |
473 elif f == "ancestor": | |
474 w = 1 * smallbonus | |
475 elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'): | |
476 w = 0 | |
477 elif f == "sort": | |
478 w = 10 # assume most sorts look at changelog | |
479 else: | |
480 w = 1 | |
481 order = x[3] | |
482 return w + wa, (op, x[1], ta, order) | |
483 raise ValueError('invalid operator %r' % op) | |
484 | |
485 def optimize(tree): | |
486 """Optimize evaluatable tree | |
487 | |
488 All pseudo operations should be transformed beforehand. | |
489 """ | |
490 _weight, newtree = _optimize(tree, small=True) | |
491 return newtree | |
492 | |
493 # the set of valid characters for the initial letter of symbols in | |
494 # alias declarations and definitions | |
495 _aliassyminitletters = _syminitletters | set(pycompat.sysstr('$')) | |
496 | |
497 def _parsewith(spec, lookup=None, syminitletters=None): | |
498 """Generate a parse tree of given spec with given tokenizing options | |
499 | |
500 >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters) | |
501 ('func', ('symbol', 'foo'), ('symbol', '$1')) | |
502 >>> _parsewith('$1') | |
503 Traceback (most recent call last): | |
504 ... | |
505 ParseError: ("syntax error in revset '$1'", 0) | |
506 >>> _parsewith('foo bar') | |
507 Traceback (most recent call last): | |
508 ... | |
509 ParseError: ('invalid token', 4) | |
510 """ | |
511 p = parser.parser(elements) | |
512 tree, pos = p.parse(tokenize(spec, lookup=lookup, | |
513 syminitletters=syminitletters)) | |
514 if pos != len(spec): | |
515 raise error.ParseError(_('invalid token'), pos) | |
516 return _fixops(parser.simplifyinfixops(tree, ('list', 'or'))) | |
517 | |
518 class _aliasrules(parser.basealiasrules): | |
519 """Parsing and expansion rule set of revset aliases""" | |
520 _section = _('revset alias') | |
521 | |
522 @staticmethod | |
523 def _parse(spec): | |
524 """Parse alias declaration/definition ``spec`` | |
525 | |
526 This allows symbol names to use also ``$`` as an initial letter | |
527 (for backward compatibility), and callers of this function should | |
528 examine whether ``$`` is used also for unexpected symbols or not. | |
529 """ | |
530 return _parsewith(spec, syminitletters=_aliassyminitletters) | |
531 | |
532 @staticmethod | |
533 def _trygetfunc(tree): | |
534 if tree[0] == 'func' and tree[1][0] == 'symbol': | |
535 return tree[1][1], getlist(tree[2]) | |
536 | |
537 def expandaliases(ui, tree): | |
538 aliases = _aliasrules.buildmap(ui.configitems('revsetalias')) | |
539 tree = _aliasrules.expand(aliases, tree) | |
540 # warn about problematic (but not referred) aliases | |
541 for name, alias in sorted(aliases.iteritems()): | |
542 if alias.error and not alias.warned: | |
543 ui.warn(_('warning: %s\n') % (alias.error)) | |
544 alias.warned = True | |
545 return tree | |
546 | |
547 def foldconcat(tree): | |
548 """Fold elements to be concatenated by `##` | |
549 """ | |
550 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'): | |
551 return tree | |
552 if tree[0] == '_concat': | |
553 pending = [tree] | |
554 l = [] | |
555 while pending: | |
556 e = pending.pop() | |
557 if e[0] == '_concat': | |
558 pending.extend(reversed(e[1:])) | |
559 elif e[0] in ('string', 'symbol'): | |
560 l.append(e[1]) | |
561 else: | |
562 msg = _("\"##\" can't concatenate \"%s\" element") % (e[0]) | |
563 raise error.ParseError(msg) | |
564 return ('string', ''.join(l)) | |
565 else: | |
566 return tuple(foldconcat(t) for t in tree) | |
567 | |
568 def parse(spec, lookup=None): | |
569 return _parsewith(spec, lookup=lookup) | |
570 | |
571 def formatspec(expr, *args): | |
572 ''' | |
573 This is a convenience function for using revsets internally, and | |
574 escapes arguments appropriately. Aliases are intentionally ignored | |
575 so that intended expression behavior isn't accidentally subverted. | |
576 | |
577 Supported arguments: | |
578 | |
579 %r = revset expression, parenthesized | |
580 %d = int(arg), no quoting | |
581 %s = string(arg), escaped and single-quoted | |
582 %b = arg.branch(), escaped and single-quoted | |
583 %n = hex(arg), single-quoted | |
584 %% = a literal '%' | |
585 | |
586 Prefixing the type with 'l' specifies a parenthesized list of that type. | |
587 | |
588 >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()")) | |
589 '(10 or 11):: and ((this()) or (that()))' | |
590 >>> formatspec('%d:: and not %d::', 10, 20) | |
591 '10:: and not 20::' | |
592 >>> formatspec('%ld or %ld', [], [1]) | |
593 "_list('') or 1" | |
594 >>> formatspec('keyword(%s)', 'foo\\xe9') | |
595 "keyword('foo\\\\xe9')" | |
596 >>> b = lambda: 'default' | |
597 >>> b.branch = b | |
598 >>> formatspec('branch(%b)', b) | |
599 "branch('default')" | |
600 >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd']) | |
601 "root(_list('a\\x00b\\x00c\\x00d'))" | |
602 ''' | |
603 | |
604 def quote(s): | |
605 return repr(str(s)) | |
606 | |
607 def argtype(c, arg): | |
608 if c == 'd': | |
609 return str(int(arg)) | |
610 elif c == 's': | |
611 return quote(arg) | |
612 elif c == 'r': | |
613 parse(arg) # make sure syntax errors are confined | |
614 return '(%s)' % arg | |
615 elif c == 'n': | |
616 return quote(node.hex(arg)) | |
617 elif c == 'b': | |
618 return quote(arg.branch()) | |
619 | |
620 def listexp(s, t): | |
621 l = len(s) | |
622 if l == 0: | |
623 return "_list('')" | |
624 elif l == 1: | |
625 return argtype(t, s[0]) | |
626 elif t == 'd': | |
627 return "_intlist('%s')" % "\0".join(str(int(a)) for a in s) | |
628 elif t == 's': | |
629 return "_list('%s')" % "\0".join(s) | |
630 elif t == 'n': | |
631 return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s) | |
632 elif t == 'b': | |
633 return "_list('%s')" % "\0".join(a.branch() for a in s) | |
634 | |
635 m = l // 2 | |
636 return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t)) | |
637 | |
638 ret = '' | |
639 pos = 0 | |
640 arg = 0 | |
641 while pos < len(expr): | |
642 c = expr[pos] | |
643 if c == '%': | |
644 pos += 1 | |
645 d = expr[pos] | |
646 if d == '%': | |
647 ret += d | |
648 elif d in 'dsnbr': | |
649 ret += argtype(d, args[arg]) | |
650 arg += 1 | |
651 elif d == 'l': | |
652 # a list of some type | |
653 pos += 1 | |
654 d = expr[pos] | |
655 ret += listexp(list(args[arg]), d) | |
656 arg += 1 | |
657 else: | |
658 raise error.Abort(_('unexpected revspec format character %s') | |
659 % d) | |
660 else: | |
661 ret += c | |
662 pos += 1 | |
663 | |
664 return ret | |
665 | |
666 def prettyformat(tree): | |
667 return parser.prettyformat(tree, ('string', 'symbol')) | |
668 | |
669 def depth(tree): | |
670 if isinstance(tree, tuple): | |
671 return max(map(depth, tree)) + 1 | |
672 else: | |
673 return 0 | |
674 | |
675 def funcsused(tree): | |
676 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'): | |
677 return set() | |
678 else: | |
679 funcs = set() | |
680 for s in tree[1:]: | |
681 funcs |= funcsused(s) | |
682 if tree[0] == 'func': | |
683 funcs.add(tree[1][1]) | |
684 return funcs |