author | Mads Kiilerich <mads@kiilerich.com> |
Thu, 08 Dec 2011 16:28:18 +0100 | |
changeset 15897 | cc021114fc98 |
parent 14701 | 4b93bd041772 |
child 17500 | 8ac8db8dc346 |
permissions | -rw-r--r-- |
11274 | 1 |
# parser.py - simple top-down operator precedence parser for mercurial |
2 |
# |
|
3 |
# Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
4 |
# |
|
5 |
# This software may be used and distributed according to the terms of the |
|
6 |
# GNU General Public License version 2 or any later version. |
|
7 |
||
11449
05af334bac05
parser: fix URL to effbot
Julian Cowley <julian@lava.net>
parents:
11412
diff
changeset
|
8 |
# see http://effbot.org/zone/simple-top-down-parsing.htm and |
11274 | 9 |
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
10 |
# for background |
|
11 |
||
12 |
# takes a tokenizer and elements |
|
13 |
# tokenizer is an iterator that returns type, value pairs |
|
14 |
# elements is a mapping of types to binding strength, prefix and infix actions |
|
15 |
# an action is a tree node name, a tree label, and an optional match |
|
16 |
# __call__(program) parses program into a labelled tree |
|
17 |
||
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
18 |
import error |
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
19 |
from i18n import _ |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
20 |
|
11274 | 21 |
class parser(object): |
22 |
def __init__(self, tokenizer, elements, methods=None): |
|
23 |
self._tokenizer = tokenizer |
|
24 |
self._elements = elements |
|
25 |
self._methods = methods |
|
13176
895f54a79c6e
templater: use the parser.py parser to extend the templater syntax
Matt Mackall <mpm@selenic.com>
parents:
11449
diff
changeset
|
26 |
self.current = None |
11274 | 27 |
def _advance(self): |
28 |
'advance the tokenizer' |
|
29 |
t = self.current |
|
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
30 |
try: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
31 |
self.current = self._iter.next() |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
32 |
except StopIteration: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
33 |
pass |
11274 | 34 |
return t |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
35 |
def _match(self, m, pos): |
11274 | 36 |
'make sure the tokenizer matches an end condition' |
37 |
if self.current[0] != m: |
|
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
38 |
raise error.ParseError(_("unexpected token: %s") % self.current[0], |
11305
d4cafcb63f77
cleanups: undefined variables
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents:
11289
diff
changeset
|
39 |
self.current[2]) |
11274 | 40 |
self._advance() |
41 |
def _parse(self, bind=0): |
|
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
42 |
token, value, pos = self._advance() |
11274 | 43 |
# handle prefix rules on current token |
44 |
prefix = self._elements[token][1] |
|
45 |
if not prefix: |
|
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
46 |
raise error.ParseError(_("not a prefix: %s") % token, pos) |
11274 | 47 |
if len(prefix) == 1: |
48 |
expr = (prefix[0], value) |
|
49 |
else: |
|
50 |
if len(prefix) > 2 and prefix[2] == self.current[0]: |
|
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
51 |
self._match(prefix[2], pos) |
11274 | 52 |
expr = (prefix[0], None) |
53 |
else: |
|
54 |
expr = (prefix[0], self._parse(prefix[1])) |
|
55 |
if len(prefix) > 2: |
|
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
56 |
self._match(prefix[2], pos) |
11274 | 57 |
# gather tokens until we meet a lower binding strength |
58 |
while bind < self._elements[self.current[0]][0]: |
|
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
59 |
token, value, pos = self._advance() |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
60 |
e = self._elements[token] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
61 |
# check for suffix - next token isn't a valid prefix |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
62 |
if len(e) == 4 and not self._elements[self.current[0]][1]: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
63 |
suffix = e[3] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
64 |
expr = (suffix[0], expr) |
11274 | 65 |
else: |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
66 |
# handle infix rules |
11412
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
67 |
if len(e) < 3 or not e[2]: |
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
68 |
raise error.ParseError(_("not an infix: %s") % token, pos) |
11412
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
69 |
infix = e[2] |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
70 |
if len(infix) == 3 and infix[2] == self.current[0]: |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
71 |
self._match(infix[2], pos) |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
72 |
expr = (infix[0], expr, (None)) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
73 |
else: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
74 |
expr = (infix[0], expr, self._parse(infix[1])) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
75 |
if len(infix) == 3: |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
76 |
self._match(infix[2], pos) |
11274 | 77 |
return expr |
78 |
def parse(self, message): |
|
79 |
'generate a parse tree from a message' |
|
80 |
self._iter = self._tokenizer(message) |
|
13176
895f54a79c6e
templater: use the parser.py parser to extend the templater syntax
Matt Mackall <mpm@selenic.com>
parents:
11449
diff
changeset
|
81 |
self._advance() |
13665
e798e430c5e5
revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents:
13176
diff
changeset
|
82 |
res = self._parse() |
e798e430c5e5
revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents:
13176
diff
changeset
|
83 |
token, value, pos = self.current |
e798e430c5e5
revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents:
13176
diff
changeset
|
84 |
return res, pos |
11274 | 85 |
def eval(self, tree): |
86 |
'recursively evaluate a parse tree using node methods' |
|
87 |
if not isinstance(tree, tuple): |
|
88 |
return tree |
|
89 |
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
|
90 |
def __call__(self, message): |
|
91 |
'parse a message into a parse tree and evaluate if methods given' |
|
92 |
t = self.parse(message) |
|
93 |
if self._methods: |
|
94 |
return self.eval(t) |
|
95 |
return t |