Mercurial: comparison mercurial/revsetlang.py

equal deleted inserted replaced

-:57875cf423c9
+:2372284d9457
 parser,
 pycompat,
 smartset,
 util,
 )
-from .utils import (
+from .utils import stringutil
-stringutil,
-)
 elements = {
 # token-type: binding-strength, primary, prefix, infix, suffix
 "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
 "[": (21, None, None, ("subscript", 1, "]"), None),
 "#": (21, None, None, ("relation", 21), None),
 "##": (20, None, None, ("_concat", 20), None),
 "~": (18, None, None, ("ancestor", 18), None),
 "^": (18, None, None, ("parent", 18), "parentpost"),
 "-": (5, None, ("negate", 19), ("minus", 5), None),
-"::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
+"::": (
-"dagrangepost"),
+17,
-"..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
+"dagrangeall",
-"dagrangepost"),
+("dagrangepre", 17),
+("dagrange", 17),
+"dagrangepost",
+),
+"..": (
+17,
+"dagrangeall",
+("dagrangepre", 17),
+("dagrange", 17),
+"dagrangepost",
+),
 ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
 "not": (10, None, ("not", 10), None, None),
 "!": (10, None, ("not", 10), None, None),
 "and": (5, None, None, ("and", 5), None),
 "&": (5, None, None, ("and", 5), None),
 _quoteletters = {'"', "'"}
 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
 # default set of valid characters for the initial letter of symbols
-_syminitletters = set(pycompat.iterbytestr(
+_syminitletters = set(
-pycompat.sysbytes(string.ascii_letters) +
+pycompat.iterbytestr(
-pycompat.sysbytes(string.digits) +
+pycompat.sysbytes(string.ascii_letters)
-'._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
++ pycompat.sysbytes(string.digits)
++ '._@'
+)
+) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
 # default set of valid characters for non-initial letters of symbols
 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
 '''
 Parse a revset statement into a stream of tokens
 >>> list(tokenize(b"@::"))
 [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
 '''
 if not isinstance(program, bytes):
-raise error.ProgrammingError('revset statement must be bytes, got %r'
+raise error.ProgrammingError(
-% program)
+'revset statement must be bytes, got %r' % program
+)
 program = pycompat.bytestr(program)
 if syminitletters is None:
 syminitletters = _syminitletters
 if symletters is None:
 symletters = _symletters
 return
 pos, l = 0, len(program)
 while pos < l:
 c = program[pos]
-if c.isspace(): # skip inter-token whitespace
+if c.isspace():  # skip inter-token whitespace
 pass
-elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
+elif (
+c == ':' and program[pos : pos + 2] == '::'
+):  # look ahead carefully
 yield ('::', None, pos)
-pos += 1 # skip ahead
+pos += 1  # skip ahead
-elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
+elif (
+c == '.' and program[pos : pos + 2] == '..'
+):  # look ahead carefully
 yield ('..', None, pos)
-pos += 1 # skip ahead
+pos += 1  # skip ahead
-elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
+elif (
+c == '#' and program[pos : pos + 2] == '##'
+):  # look ahead carefully
 yield ('##', None, pos)
-pos += 1 # skip ahead
+pos += 1  # skip ahead
-elif c in _simpleopletters: # handle simple operators
+elif c in _simpleopletters:  # handle simple operators
 yield (c, None, pos)
-elif (c in _quoteletters or c == 'r' and
+elif (
-program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
+c in _quoteletters
+or c == 'r'
+and program[pos : pos + 2] in ("r'", 'r"')
+):  # handle quoted strings
 if c == 'r':
 pos += 1
 c = program[pos]
 decode = lambda x: x
 else:
 decode = parser.unescapestr
 pos += 1
 s = pos
-while pos < l: # find closing quote
+while pos < l:  # find closing quote
 d = program[pos]
-if d == '\\': # skip over escaped characters
+if d == '\\':  # skip over escaped characters
 pos += 2
 continue
 if d == c:
 yield ('string', decode(program[s:pos]), s)
 break
 raise error.ParseError(_("unterminated string"), s)
 # gather up a symbol/keyword
 elif c in syminitletters:
 s = pos
 pos += 1
-while pos < l: # find end of symbol
+while pos < l:  # find end of symbol
 d = program[pos]
 if d not in symletters:
 break
-if d == '.' and program[pos - 1] == '.': # special case for ..
+if d == '.' and program[pos - 1] == '.':  # special case for ..
 pos -= 1
 break
 pos += 1
 sym = program[s:pos]
-if sym in keywords: # operator keywords
+if sym in keywords:  # operator keywords
 yield (sym, None, s)
 elif '-' in sym:
 # some jerk gave us foo-bar-baz, try to check if it's a symbol
 if lookup and lookup(sym):
 # looks like a real symbol
 yield ('symbol', sym, s)
 else:
 # looks like an expression
 parts = sym.split('-')
 for p in parts[:-1]:
-if p: # possible consecutive -
+if p:  # possible consecutive -
 yield ('symbol', p, s)
 s += len(p)
 yield ('-', None, s)
 s += 1
-if parts[-1]: # possible trailing -
+if parts[-1]:  # possible trailing -
 yield ('symbol', parts[-1], s)
 else:
 yield ('symbol', sym, s)
 pos -= 1
 else:
-raise error.ParseError(_("syntax error in revset '%s'") %
+raise error.ParseError(
-program, pos)
+_("syntax error in revset '%s'") % program, pos
+)
 pos += 1
 yield ('end', None, pos)
 # helpers
 _notset = object()
 def getsymbol(x):
 if x and x[0] == 'symbol':
 return x[1]
 raise error.ParseError(_('not a symbol'))
 def getstring(x, err):
 if x and (x[0] == 'string' or x[0] == 'symbol'):
 return x[1]
 raise error.ParseError(err)
 def getinteger(x, err, default=_notset):
 if not x and default is not _notset:
 return default
 try:
 return int(getstring(x, err))
 except ValueError:
 raise error.ParseError(err)
 def getboolean(x, err):
 value = stringutil.parsebool(getsymbol(x))
 if value is not None:
 return value
 raise error.ParseError(err)
 def getlist(x):
 if not x:
 return []
 if x[0] == 'list':
 return list(x[1:])
 return [x]
 def getrange(x, err):
 if not x:
 raise error.ParseError(err)
 op = x[0]
 return x[1], None
 elif op == 'rangeall':
 return None, None
 raise error.ParseError(err)
 def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
 """Get [first, last] integer range (both inclusive) from a parsed tree
 If any of the sides omitted, and if no default provided, ParseError will
 be raised.
 n = getinteger(x, err1)
 return n, n
 a, b = getrange(x, err1)
 return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
 def getargs(x, min, max, err):
 l = getlist(x)
 if len(l) < min or (max >= 0 and len(l) > max):
 raise error.ParseError(err)
 return l
 def getargsdict(x, funcname, keys):
-return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
+return parser.buildargsdict(
-keyvaluenode='keyvalue', keynode='symbol')
+getlist(x),
+funcname,
+parser.splitargspec(keys),
+keyvaluenode='keyvalue',
+keynode='symbol',
+)
 # cache of {spec: raw parsed tree} built internally
 _treecache = {}
 def _cachedtree(spec):
 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
 tree = _treecache.get(spec)
 if tree is None:
 _treecache[spec] = tree = parse(spec)
 return tree
 def _build(tmplspec, *repls):
 """Create raw parsed tree from a template revset statement
 >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
 """
 template = _cachedtree(tmplspec)
 return parser.buildtree(template, ('symbol', '_'), *repls)
 def _match(patspec, tree):
 """Test if a tree matches the given pattern statement; return the matches
 >>> _match(b'f(_)', parse(b'f()'))
 >>> _match(b'f(_)', parse(b'f(1)'))
 [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
 >>> _match(b'f(_)', parse(b'f(1, 2)'))
 """
 pattern = _cachedtree(patspec)
-return parser.matchtree(pattern, tree, ('symbol', '_'),
+return parser.matchtree(
-{'keyvalue', 'list'})
+pattern, tree, ('symbol', '_'), {'keyvalue', 'list'}
+)
 def _matchonly(revs, bases):
 return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
 def _fixops(x):
 """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
 handled well by our simple top-down parser"""
 if not isinstance(x, tuple):
 elif op == 'subscript' and x[1][0] == 'relation':
 # x#y[z] ternary
 return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
 return (op,) + tuple(_fixops(y) for y in x[1:])
 def _analyze(x):
 if x is None:
 return x
 return (op, None)
 elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
 return (op, _analyze(x[1]))
 elif op == 'group':
 return _analyze(x[1])
-elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
+elif op in {
-'subscript'}:
+'and',
+'dagrange',
+'range',
+'parent',
+'ancestor',
+'relation',
+'subscript',
+}:
 ta = _analyze(x[1])
 tb = _analyze(x[2])
 return (op, ta, tb)
 elif op == 'relsubscript':
 ta = _analyze(x[1])
 return (op, x[1], _analyze(x[2]))
 elif op == 'func':
 return (op, x[1], _analyze(x[2]))
 raise ValueError('invalid operator %r' % op)
 def analyze(x):
 """Transform raw parsed tree to evaluatable tree which can be fed to
 optimize() or getset()
 All pseudo operations should be mapped to real operations or functions
 defined in methods or symbols table respectively.
 """
 return _analyze(x)
 def _optimize(x):
 if x is None:
 return 0, x
 op = x[0]
 if op in ('string', 'symbol', 'smartset'):
-return 0.5, x # single revisions are small
+return 0.5, x  # single revisions are small
 elif op == 'and':
 wa, ta = _optimize(x[1])
 wb, tb = _optimize(x[2])
 w = min(wa, wb)
 return w, (op, ta, tb)
 elif op == 'or':
 # fast path for machine-generated expression, that is likely to have
 # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
 ws, ts, ss = [], [], []
 def flushss():
 if not ss:
 return
 if len(ss) == 1:
 w, t = ss[0]
 y = _build('_list(_)', ('string', s))
 w, t = _optimize(y)
 ws.append(w)
 ts.append(t)
 del ss[:]
 for y in getlist(x[1]):
 w, t = _optimize(y)
 if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
 ss.append((w, t))
 continue
 flushss()
 ws.append(w)
 ts.append(t)
 flushss()
 if len(ts) == 1:
-return ws[0], ts[0] # 'or' operation is fully optimized out
+return ws[0], ts[0]  # 'or' operation is fully optimized out
 return max(ws), (op, ('list',) + tuple(ts))
 elif op == 'not':
 # Optimize not public() to _notpublic() because we have a fast version
 if _match('public()', x[1]):
 o = _optimize(_build('_notpublic()'))
 return w + wa, _build('_commonancestorheads(_)', m[1])
 return w + wa, (op, x[1], ta)
 raise ValueError('invalid operator %r' % op)
 def optimize(tree):
 """Optimize evaluatable tree
 All pseudo operations should be transformed beforehand.
 """
 _weight, newtree = _optimize(tree)
 return newtree
 # the set of valid characters for the initial letter of symbols in
 # alias declarations and definitions
 _aliassyminitletters = _syminitletters | {'$'}
 def _parsewith(spec, lookup=None, syminitletters=None):
 """Generate a parse tree of given spec with given tokenizing options
 >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
 ParseError: ('invalid token', 4)
 """
 if lookup and spec.startswith('revset(') and spec.endswith(')'):
 lookup = None
 p = parser.parser(elements)
-tree, pos = p.parse(tokenize(spec, lookup=lookup,
+tree, pos = p.parse(
-syminitletters=syminitletters))
+tokenize(spec, lookup=lookup, syminitletters=syminitletters)
+)
 if pos != len(spec):
 raise error.ParseError(_('invalid token'), pos)
 return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
 class _aliasrules(parser.basealiasrules):
 """Parsing and expansion rule set of revset aliases"""
 _section = _('revset alias')
 @staticmethod
 def _parse(spec):
 """Parse alias declaration/definition ``spec``
 @staticmethod
 def _trygetfunc(tree):
 if tree[0] == 'func' and tree[1][0] == 'symbol':
 return tree[1][1], getlist(tree[2])
 def expandaliases(tree, aliases, warn=None):
 """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
 aliases = _aliasrules.buildmap(aliases)
 tree = _aliasrules.expand(aliases, tree)
 if alias.error and not alias.warned:
 warn(_('warning: %s\n') % (alias.error))
 alias.warned = True
 return tree
 def foldconcat(tree):
 """Fold elements to be concatenated by `##`
 """
-if (not isinstance(tree, tuple)
+if not isinstance(tree, tuple) or tree[0] in (
-or tree[0] in ('string', 'symbol', 'smartset')):
+'string',
+'symbol',
+'smartset',
+):
 return tree
 if tree[0] == '_concat':
 pending = [tree]
 l = []
 while pending:
 raise error.ParseError(msg)
 return ('string', ''.join(l))
 else:
 return tuple(foldconcat(t) for t in tree)
 def parse(spec, lookup=None):
 try:
 return _parsewith(spec, lookup=lookup)
 except error.ParseError as inst:
 if len(inst.args) > 1:  # has location
 # start. Therefore, we print "loc + 1" spaces (instead of "loc")
 # to line up the caret with the location of the error.
 inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
 raise
 def _quote(s):
 r"""Quote a value in order to make it safe for the revset engine.
 >>> _quote(b'asdf')
 "'asdf'"
 >>> _quote(1)
 "'1'"
 """
 return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
 def _formatargtype(c, arg):
 if c == 'd':
 return '_rev(%d)' % int(arg)
 elif c == 's':
 return _quote(arg)
 elif c == 'r':
 if not isinstance(arg, bytes):
 raise TypeError
-parse(arg) # make sure syntax errors are confined
+parse(arg)  # make sure syntax errors are confined
 return '(%s)' % arg
 elif c == 'n':
 return _quote(node.hex(arg))
 elif c == 'b':
 try:
 return _quote(arg.branch())
 except AttributeError:
 raise TypeError
 raise error.ParseError(_('unexpected revspec format character %s') % c)
 def _formatlistexp(s, t):
 l = len(s)
 if l == 0:
 return "_list('')"
 raise TypeError
 m = l // 2
 return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
 def _formatintlist(data):
 try:
 l = len(data)
 if l == 0:
 return "_list('')"
 return _formatargtype('d', data[0])
 return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
 except (TypeError, ValueError):
 raise error.ParseError(_('invalid argument for revspec'))
 def _formatparamexp(args, t):
 return ', '.join(_formatargtype(t, a) for a in args)
 _formatlistfuncs = {
 'l': _formatlistexp,
 'p': _formatparamexp,
 }
 def formatspec(expr, *args):
 '''
 This is a convenience function for using revsets internally, and
 escapes arguments appropriately. Aliases are intentionally ignored
 ret.append(_formatintlist(list(arg)))
 else:
 raise error.ProgrammingError("unknown revspec item type: %r" % t)
 return b''.join(ret)
 def spectree(expr, *args):
 """similar to formatspec but return a parsed and optimized tree"""
 parsed = _parseargs(expr, args)
 ret = []
 inputs = []
 tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
 tree = foldconcat(tree)
 tree = analyze(tree)
 tree = optimize(tree)
 return tree
 def _parseargs(expr, args):
 """parse the expression and replace all inexpensive args
 return a list of tuple [(arg-type, arg-value)]
 raise error.ParseError(_('missing argument for revspec'))
 f = _formatlistfuncs.get(d)
 if f:
 # a list of some type, might be expensive, do not replace
 pos += 1
-islist = (d == 'l')
+islist = d == 'l'
 try:
 d = expr[pos]
 except IndexError:
 raise error.ParseError(_('incomplete revspec format character'))
 if islist and d == 'd' and arg:
 raise error.ParseError(_('too many revspec arguments specified'))
 except StopIteration:
 pass
 return ret
 def prettyformat(tree):
 return parser.prettyformat(tree, ('string', 'symbol'))
 def depth(tree):
 if isinstance(tree, tuple):
 return max(map(depth, tree)) + 1
 else:
 return 0
 def funcsused(tree):
 if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
 return set()
 else:
 funcs |= funcsused(s)
 if tree[0] == 'func':
 funcs.add(tree[1][1])
 return funcs
 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
 def _ishashlikesymbol(symbol):
 """returns true if the symbol looks like a hash"""
 return _hashre.match(symbol)
 def gethashlikesymbols(tree):
 """returns the list of symbols of the tree that look like hashes
 >>> gethashlikesymbols(parse(b'3::abe3ff'))

changeset 43076	2372284d9457
parent 41835	ddb174511f1b
child 43077	687b865b95ad