author | Patrick Mezard <pmezard@gmail.com> |
Thu, 18 Nov 2010 23:15:13 +0100 | |
changeset 13018 | 96956105e92d |
parent 11449 | 05af334bac05 |
child 13176 | 895f54a79c6e |
permissions | -rw-r--r-- |
11274 | 1 |
# parser.py - simple top-down operator precedence parser for mercurial |
2 |
# |
|
3 |
# Copyright 2010 Matt Mackall <mpm@selenic.com> |
|
4 |
# |
|
5 |
# This software may be used and distributed according to the terms of the |
|
6 |
# GNU General Public License version 2 or any later version. |
|
7 |
||
11449
05af334bac05
parser: fix URL to effbot
Julian Cowley <julian@lava.net>
parents:
11412
diff
changeset
|
8 |
# see http://effbot.org/zone/simple-top-down-parsing.htm and |
11274 | 9 |
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
10 |
# for background |
|
11 |
||
12 |
# takes a tokenizer and elements |
|
13 |
# tokenizer is an iterator that returns type, value pairs |
|
14 |
# elements is a mapping of types to binding strength, prefix and infix actions |
|
15 |
# an action is a tree node name, a tree label, and an optional match |
|
16 |
# __call__(program) parses program into a labelled tree |
|
17 |
||
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
18 |
import error |
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
19 |
|
11274 | 20 |
class parser(object): |
21 |
def __init__(self, tokenizer, elements, methods=None): |
|
22 |
self._tokenizer = tokenizer |
|
23 |
self._elements = elements |
|
24 |
self._methods = methods |
|
25 |
def _advance(self): |
|
26 |
'advance the tokenizer' |
|
27 |
t = self.current |
|
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
28 |
try: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
29 |
self.current = self._iter.next() |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
30 |
except StopIteration: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
31 |
pass |
11274 | 32 |
return t |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
33 |
def _match(self, m, pos): |
11274 | 34 |
'make sure the tokenizer matches an end condition' |
35 |
if self.current[0] != m: |
|
11305
d4cafcb63f77
cleanups: undefined variables
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents:
11289
diff
changeset
|
36 |
raise error.ParseError("unexpected token: %s" % self.current[0], |
d4cafcb63f77
cleanups: undefined variables
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents:
11289
diff
changeset
|
37 |
self.current[2]) |
11274 | 38 |
self._advance() |
39 |
def _parse(self, bind=0): |
|
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
40 |
token, value, pos = self._advance() |
11274 | 41 |
# handle prefix rules on current token |
42 |
prefix = self._elements[token][1] |
|
43 |
if not prefix: |
|
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
44 |
raise error.ParseError("not a prefix: %s" % token, pos) |
11274 | 45 |
if len(prefix) == 1: |
46 |
expr = (prefix[0], value) |
|
47 |
else: |
|
48 |
if len(prefix) > 2 and prefix[2] == self.current[0]: |
|
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
49 |
self._match(prefix[2], pos) |
11274 | 50 |
expr = (prefix[0], None) |
51 |
else: |
|
52 |
expr = (prefix[0], self._parse(prefix[1])) |
|
53 |
if len(prefix) > 2: |
|
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
54 |
self._match(prefix[2], pos) |
11274 | 55 |
# gather tokens until we meet a lower binding strength |
56 |
while bind < self._elements[self.current[0]][0]: |
|
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
57 |
token, value, pos = self._advance() |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
58 |
e = self._elements[token] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
59 |
# check for suffix - next token isn't a valid prefix |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
60 |
if len(e) == 4 and not self._elements[self.current[0]][1]: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
61 |
suffix = e[3] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
62 |
expr = (suffix[0], expr) |
11274 | 63 |
else: |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
64 |
# handle infix rules |
11412
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
65 |
if len(e) < 3 or not e[2]: |
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
66 |
raise error.ParseError("not an infix: %s" % token, pos) |
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
67 |
infix = e[2] |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
68 |
if len(infix) == 3 and infix[2] == self.current[0]: |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
69 |
self._match(infix[2], pos) |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
70 |
expr = (infix[0], expr, (None)) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
71 |
else: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
72 |
expr = (infix[0], expr, self._parse(infix[1])) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
73 |
if len(infix) == 3: |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
74 |
self._match(infix[2], pos) |
11274 | 75 |
return expr |
76 |
def parse(self, message): |
|
77 |
'generate a parse tree from a message' |
|
78 |
self._iter = self._tokenizer(message) |
|
79 |
self.current = self._iter.next() |
|
80 |
return self._parse() |
|
81 |
def eval(self, tree): |
|
82 |
'recursively evaluate a parse tree using node methods' |
|
83 |
if not isinstance(tree, tuple): |
|
84 |
return tree |
|
85 |
return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) |
|
86 |
def __call__(self, message): |
|
87 |
'parse a message into a parse tree and evaluate if methods given' |
|
88 |
t = self.parse(message) |
|
89 |
if self._methods: |
|
90 |
return self.eval(t) |
|
91 |
return t |