author | Matt Mackall <mpm@selenic.com> |
Wed, 02 Jun 2010 14:07:46 -0500 | |
changeset 11278 | 7df88cdf47fd |
parent 11274 | 77272d28b53f |
child 11289 | 4215ce511134 |
permissions | -rw-r--r-- |
11274 | 1 |
# parser.py - simple top-down operator precedence parser for mercurial |
2 |
# |
|
3 |
# Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
4 |
# |
|
5 |
# This software may be used and distributed according to the terms of the |
|
6 |
# GNU General Public License version 2 or any later version. |
|
7 |
||
8 |
# see http://effbot.org/zone/simple-top-down-parsing.txt and |
|
9 |
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
|
10 |
# for background |
|
11 |
||
12 |
# takes a tokenizer and elements |
|
13 |
# tokenizer is an iterator that returns type, value pairs |
|
14 |
# elements is a mapping of types to binding strength, prefix and infix actions |
|
15 |
# an action is a tree node name, a tree label, and an optional match |
|
16 |
# __call__(program) parses program into a labelled tree |
|
17 |
||
18 |
class parser(object): |
|
19 |
def __init__(self, tokenizer, elements, methods=None): |
|
20 |
self._tokenizer = tokenizer |
|
21 |
self._elements = elements |
|
22 |
self._methods = methods |
|
23 |
def _advance(self): |
|
24 |
'advance the tokenizer' |
|
25 |
t = self.current |
|
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
26 |
try: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
27 |
self.current = self._iter.next() |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
28 |
except StopIteration: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
29 |
pass |
11274 | 30 |
return t |
31 |
def _match(self, m): |
|
32 |
'make sure the tokenizer matches an end condition' |
|
33 |
if self.current[0] != m: |
|
34 |
raise SyntaxError(self.current) |
|
35 |
self._advance() |
|
36 |
def _parse(self, bind=0): |
|
37 |
token, value = self._advance() |
|
38 |
# handle prefix rules on current token |
|
39 |
prefix = self._elements[token][1] |
|
40 |
if not prefix: |
|
41 |
raise SyntaxError("not a prefix: %s" % token) |
|
42 |
if len(prefix) == 1: |
|
43 |
expr = (prefix[0], value) |
|
44 |
else: |
|
45 |
if len(prefix) > 2 and prefix[2] == self.current[0]: |
|
46 |
self._match(prefix[2]) |
|
47 |
expr = (prefix[0], None) |
|
48 |
else: |
|
49 |
expr = (prefix[0], self._parse(prefix[1])) |
|
50 |
if len(prefix) > 2: |
|
51 |
self._match(prefix[2]) |
|
52 |
# gather tokens until we meet a lower binding strength |
|
53 |
while bind < self._elements[self.current[0]][0]: |
|
54 |
token, value = self._advance() |
|
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
55 |
e = self._elements[token] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
56 |
# check for suffix - next token isn't a valid prefix |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
57 |
if len(e) == 4 and not self._elements[self.current[0]][1]: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
58 |
suffix = e[3] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
59 |
expr = (suffix[0], expr) |
11274 | 60 |
else: |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
61 |
# handle infix rules |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
62 |
infix = self._elements[token][2] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
63 |
if len(infix) == 3 and infix[2] == self.current[0]: |
11274 | 64 |
self._match(infix[2]) |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
65 |
expr = (infix[0], expr, (None)) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
66 |
else: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
67 |
if not infix[0]: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
68 |
raise SyntaxError("not an infix") |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
69 |
expr = (infix[0], expr, self._parse(infix[1])) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
70 |
if len(infix) == 3: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
71 |
self._match(infix[2]) |
11274 | 72 |
return expr |
73 |
def parse(self, message): |
|
74 |
'generate a parse tree from a message' |
|
75 |
self._iter = self._tokenizer(message) |
|
76 |
self.current = self._iter.next() |
|
77 |
return self._parse() |
|
78 |
def eval(self, tree): |
|
79 |
'recursively evaluate a parse tree using node methods' |
|
80 |
if not isinstance(tree, tuple): |
|
81 |
return tree |
|
82 |
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
|
83 |
def __call__(self, message): |
|
84 |
'parse a message into a parse tree and evaluate if methods given' |
|
85 |
t = self.parse(message) |
|
86 |
if self._methods: |
|
87 |
return self.eval(t) |
|
88 |
return t |