annotate mercurial/parser.py @ 21812:73e4a02e6d23

hg: add support for HGUNICODEPEDANTRY environment variable This lets us easily verify that there are no implicit conversions between unicodes and bytes in Mercurial's codebase. Based on something mpm did by hand periodically, but it kept regressing, so just open the door to running it in a buildbot.
author Augie Fackler <raf@durin42.com>
date Mon, 23 Jun 2014 09:33:07 -0400
parents 7c4778bc29f0
children d647f97f88dd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
1 # parser.py - simple top-down operator precedence parser for mercurial
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
2 #
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
4 #
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
5 # This software may be used and distributed according to the terms of the
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
6 # GNU General Public License version 2 or any later version.
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
7
11449
05af334bac05 parser: fix URL to effbot
Julian Cowley <julian@lava.net>
parents: 11412
diff changeset
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
10 # for background
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
11
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
12 # takes a tokenizer and elements
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
13 # tokenizer is an iterator that returns type, value pairs
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
14 # elements is a mapping of types to binding strength, prefix and infix actions
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
15 # an action is a tree node name, a tree label, and an optional match
17500
8ac8db8dc346 en-us: labeled
timeless@mozdev.org
parents: 14701
diff changeset
16 # __call__(program) parses program into a labeled tree
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
17
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11278
diff changeset
18 import error
14701
4b93bd041772 parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents: 13665
diff changeset
19 from i18n import _
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11278
diff changeset
20
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
21 class parser(object):
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
22 def __init__(self, tokenizer, elements, methods=None):
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
23 self._tokenizer = tokenizer
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
24 self._elements = elements
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
25 self._methods = methods
13176
895f54a79c6e templater: use the parser.py parser to extend the templater syntax
Matt Mackall <mpm@selenic.com>
parents: 11449
diff changeset
26 self.current = None
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
27 def _advance(self):
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
28 'advance the tokenizer'
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
29 t = self.current
11278
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
30 try:
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
31 self.current = self._iter.next()
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
32 except StopIteration:
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
33 pass
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
34 return t
11319
9d1cf337a78d parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 11305
diff changeset
35 def _match(self, m, pos):
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
36 'make sure the tokenizer matches an end condition'
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
37 if self.current[0] != m:
14701
4b93bd041772 parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents: 13665
diff changeset
38 raise error.ParseError(_("unexpected token: %s") % self.current[0],
11305
d4cafcb63f77 cleanups: undefined variables
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents: 11289
diff changeset
39 self.current[2])
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
40 self._advance()
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
41 def _parse(self, bind=0):
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11278
diff changeset
42 token, value, pos = self._advance()
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
43 # handle prefix rules on current token
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
44 prefix = self._elements[token][1]
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
45 if not prefix:
14701
4b93bd041772 parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents: 13665
diff changeset
46 raise error.ParseError(_("not a prefix: %s") % token, pos)
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
47 if len(prefix) == 1:
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
48 expr = (prefix[0], value)
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
49 else:
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
50 if len(prefix) > 2 and prefix[2] == self.current[0]:
11319
9d1cf337a78d parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 11305
diff changeset
51 self._match(prefix[2], pos)
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
52 expr = (prefix[0], None)
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
53 else:
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
54 expr = (prefix[0], self._parse(prefix[1]))
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
55 if len(prefix) > 2:
11319
9d1cf337a78d parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 11305
diff changeset
56 self._match(prefix[2], pos)
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
57 # gather tokens until we meet a lower binding strength
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
58 while bind < self._elements[self.current[0]][0]:
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11278
diff changeset
59 token, value, pos = self._advance()
11278
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
60 e = self._elements[token]
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
61 # check for suffix - next token isn't a valid prefix
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
62 if len(e) == 4 and not self._elements[self.current[0]][1]:
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
63 suffix = e[3]
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
64 expr = (suffix[0], expr)
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
65 else:
11278
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
66 # handle infix rules
11412
51ceb1571805 parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents: 11319
diff changeset
67 if len(e) < 3 or not e[2]:
14701
4b93bd041772 parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents: 13665
diff changeset
68 raise error.ParseError(_("not an infix: %s") % token, pos)
11412
51ceb1571805 parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents: 11319
diff changeset
69 infix = e[2]
11278
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
70 if len(infix) == 3 and infix[2] == self.current[0]:
11319
9d1cf337a78d parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 11305
diff changeset
71 self._match(infix[2], pos)
11278
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
72 expr = (infix[0], expr, (None))
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
73 else:
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
74 expr = (infix[0], expr, self._parse(infix[1]))
7df88cdf47fd revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents: 11274
diff changeset
75 if len(infix) == 3:
11319
9d1cf337a78d parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents: 11305
diff changeset
76 self._match(infix[2], pos)
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
77 return expr
20778
7c4778bc29f0 parser: allow passing a lookup function to a tokenizer
Matt Mackall <mpm@selenic.com>
parents: 17500
diff changeset
78 def parse(self, message, lookup=None):
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
79 'generate a parse tree from a message'
20778
7c4778bc29f0 parser: allow passing a lookup function to a tokenizer
Matt Mackall <mpm@selenic.com>
parents: 17500
diff changeset
80 if lookup:
7c4778bc29f0 parser: allow passing a lookup function to a tokenizer
Matt Mackall <mpm@selenic.com>
parents: 17500
diff changeset
81 self._iter = self._tokenizer(message, lookup)
7c4778bc29f0 parser: allow passing a lookup function to a tokenizer
Matt Mackall <mpm@selenic.com>
parents: 17500
diff changeset
82 else:
7c4778bc29f0 parser: allow passing a lookup function to a tokenizer
Matt Mackall <mpm@selenic.com>
parents: 17500
diff changeset
83 self._iter = self._tokenizer(message)
13176
895f54a79c6e templater: use the parser.py parser to extend the templater syntax
Matt Mackall <mpm@selenic.com>
parents: 11449
diff changeset
84 self._advance()
13665
e798e430c5e5 revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents: 13176
diff changeset
85 res = self._parse()
e798e430c5e5 revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents: 13176
diff changeset
86 token, value, pos = self.current
e798e430c5e5 revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents: 13176
diff changeset
87 return res, pos
11274
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
88 def eval(self, tree):
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
89 'recursively evaluate a parse tree using node methods'
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
90 if not isinstance(tree, tuple):
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
91 return tree
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
92 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
93 def __call__(self, message):
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
94 'parse a message into a parse tree and evaluate if methods given'
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
95 t = self.parse(message)
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
96 if self._methods:
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
97 return self.eval(t)
77272d28b53f revset: introduce basic parser
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
98 return t