Mercurial > hg-stable
diff mercurial/fileset.py @ 14511:30506b894359
filesets: introduce basic fileset expression parser
author | Matt Mackall <mpm@selenic.com> |
---|---|
date | Wed, 01 Jun 2011 19:12:18 -0500 |
parents | mercurial/revset.py@4f695345979c |
children | 85fe676c27e9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/fileset.py Wed Jun 01 19:12:18 2011 -0500 @@ -0,0 +1,79 @@ +# fileset.py - file set queries for mercurial +# +# Copyright 2010 Matt Mackall <mpm@selenic.com> +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +import parser, error +from i18n import _ + +elements = { + "(": (20, ("group", 1, ")"), ("func", 1, ")")), + "-": (5, ("negate", 19), ("minus", 5)), + "not": (10, ("not", 10)), + "!": (10, ("not", 10)), + "and": (5, None, ("and", 5)), + "&": (5, None, ("and", 5)), + "or": (4, None, ("or", 4)), + "|": (4, None, ("or", 4)), + "+": (4, None, ("or", 4)), + ",": (2, None, ("list", 2)), + ")": (0, None, None), + "symbol": (0, ("symbol",), None), + "string": (0, ("string",), None), + "end": (0, None, None), +} + +keywords = set(['and', 'or', 'not']) + +def tokenize(program): + pos, l = 0, len(program) + while pos < l: + c = program[pos] + if c.isspace(): # skip inter-token whitespace + pass + elif c in "(),-|&+!": # handle simple operators + yield (c, None, pos) + elif (c in '"\'' or c == 'r' and + program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings + if c == 'r': + pos += 1 + c = program[pos] + decode = lambda x: x + else: + decode = lambda x: x.decode('string-escape') + pos += 1 + s = pos + while pos < l: # find closing quote + d = program[pos] + if d == '\\': # skip over escaped characters + pos += 2 + continue + if d == c: + yield ('string', decode(program[s:pos]), s) + break + pos += 1 + else: + raise error.ParseError(_("unterminated string"), s) + elif c.isalnum() or c in '.*{}[]?' or ord(c) > 127: # gather up a symbol/keyword + s = pos + pos += 1 + while pos < l: # find end of symbol + d = program[pos] + if not (d.isalnum() or d in ".*{}[]?," or ord(d) > 127): + break + pos += 1 + sym = program[s:pos] + if sym in keywords: # operator keywords + yield (sym, None, s) + else: + yield ('symbol', sym, s) + pos -= 1 + else: + raise error.ParseError(_("syntax error"), pos) + pos += 1 + yield ('end', None, pos) + +parse = parser.parser(tokenize, elements).parse +