author | Adrian Buehlmann <adrian@cadifra.com> |
Fri, 03 Jun 2011 11:26:43 +0200 | |
changeset 14524 | 97d2259af787 |
parent 14513 | 85fe676c27e9 |
child 14551 | 68d814a3cefd |
permissions | -rw-r--r-- |
14511
30506b894359
filesets: introduce basic fileset expression parser
Matt Mackall <mpm@selenic.com>
parents:
14509
diff
changeset
|
1 |
# fileset.py - file set queries for mercurial |
11275 | 2 |
# |
3 |
# Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
4 |
# |
|
5 |
# This software may be used and distributed according to the terms of the |
|
6 |
# GNU General Public License version 2 or any later version. |
|
7 |
||
14511
30506b894359
filesets: introduce basic fileset expression parser
Matt Mackall <mpm@selenic.com>
parents:
14509
diff
changeset
|
8 |
import parser, error |
13593
cc4721ed7a2a
help: extract items doc generation function
Patrick Mezard <pmezard@gmail.com>
parents:
13506
diff
changeset
|
9 |
from i18n import _ |
11275 | 10 |
|
11 |
elements = { |
|
12 |
"(": (20, ("group", 1, ")"), ("func", 1, ")")), |
|
12616
e797fdf91df4
revset: lower precedence of minus infix (issue2361)
Matt Mackall <mpm@selenic.com>
parents:
12615
diff
changeset
|
13 |
"-": (5, ("negate", 19), ("minus", 5)), |
11275 | 14 |
"not": (10, ("not", 10)), |
15 |
"!": (10, ("not", 10)), |
|
16 |
"and": (5, None, ("and", 5)), |
|
17 |
"&": (5, None, ("and", 5)), |
|
18 |
"or": (4, None, ("or", 4)), |
|
19 |
"|": (4, None, ("or", 4)), |
|
20 |
"+": (4, None, ("or", 4)), |
|
21 |
",": (2, None, ("list", 2)), |
|
22 |
")": (0, None, None), |
|
23 |
"symbol": (0, ("symbol",), None), |
|
24 |
"string": (0, ("string",), None), |
|
25 |
"end": (0, None, None), |
|
26 |
} |
|
27 |
||
28 |
keywords = set(['and', 'or', 'not']) |
|
29 |
||
30 |
def tokenize(program): |
|
31 |
pos, l = 0, len(program) |
|
32 |
while pos < l: |
|
33 |
c = program[pos] |
|
34 |
if c.isspace(): # skip inter-token whitespace |
|
35 |
pass |
|
14511
30506b894359
filesets: introduce basic fileset expression parser
Matt Mackall <mpm@selenic.com>
parents:
14509
diff
changeset
|
36 |
elif c in "(),-|&+!": # handle simple operators |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11284
diff
changeset
|
37 |
yield (c, None, pos) |
12408
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
38 |
elif (c in '"\'' or c == 'r' and |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
39 |
program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
40 |
if c == 'r': |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
41 |
pos += 1 |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
42 |
c = program[pos] |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
43 |
decode = lambda x: x |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
44 |
else: |
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
45 |
decode = lambda x: x.decode('string-escape') |
11275 | 46 |
pos += 1 |
47 |
s = pos |
|
48 |
while pos < l: # find closing quote |
|
49 |
d = program[pos] |
|
50 |
if d == '\\': # skip over escaped characters |
|
51 |
pos += 2 |
|
52 |
continue |
|
53 |
if d == c: |
|
12408
78a97859b90d
revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents:
12401
diff
changeset
|
54 |
yield ('string', decode(program[s:pos]), s) |
11275 | 55 |
break |
56 |
pos += 1 |
|
57 |
else: |
|
11383
de544774ebea
revset: all your error messages are belong to _
Martin Geisler <mg@lazybytes.net>
parents:
11349
diff
changeset
|
58 |
raise error.ParseError(_("unterminated string"), s) |
14513 | 59 |
elif c.isalnum() or c in '.*{}[]?' or ord(c) > 127: |
60 |
# gather up a symbol/keyword |
|
11275 | 61 |
s = pos |
62 |
pos += 1 |
|
63 |
while pos < l: # find end of symbol |
|
64 |
d = program[pos] |
|
14511
30506b894359
filesets: introduce basic fileset expression parser
Matt Mackall <mpm@selenic.com>
parents:
14509
diff
changeset
|
65 |
if not (d.isalnum() or d in ".*{}[]?," or ord(d) > 127): |
11275 | 66 |
break |
67 |
pos += 1 |
|
68 |
sym = program[s:pos] |
|
69 |
if sym in keywords: # operator keywords |
|
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11284
diff
changeset
|
70 |
yield (sym, None, s) |
11275 | 71 |
else: |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11284
diff
changeset
|
72 |
yield ('symbol', sym, s) |
11275 | 73 |
pos -= 1 |
74 |
else: |
|
11383
de544774ebea
revset: all your error messages are belong to _
Martin Geisler <mg@lazybytes.net>
parents:
11349
diff
changeset
|
75 |
raise error.ParseError(_("syntax error"), pos) |
11275 | 76 |
pos += 1 |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11284
diff
changeset
|
77 |
yield ('end', None, pos) |
11275 | 78 |
|
79 |
parse = parser.parser(tokenize, elements).parse |
|
80 |