Mercurial > hg
annotate mercurial/parser.py @ 17441:cb12d3ce5607 stable
subrepo: encode unicode path names (issue3610)
Subversion 1.7 changes its XML output to include an explicit encoding tag:
<?xml version="1.0" encoding="UTF-8"?>
This triggers xml.dom.minidom to always return unicode strings, causing
other parts of the code to explode.
We unconditionally encode path names before handing them back, which
works with both str (actually a no-op) and unicode values.
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Tue, 04 Sep 2012 15:46:04 -0700 |
parents | 4b93bd041772 |
children | 8ac8db8dc346 |
rev | line source |
---|---|
11274 | 1 # parser.py - simple top-down operator precedence parser for mercurial |
2 # | |
3 # Copyright 2010 Matt Mackall <mpm@selenic.com> | |
4 # | |
5 # This software may be used and distributed according to the terms of the | |
6 # GNU General Public License version 2 or any later version. | |
7 | |
11449
05af334bac05
parser: fix URL to effbot
Julian Cowley <julian@lava.net>
parents:
11412
diff
changeset
|
8 # see http://effbot.org/zone/simple-top-down-parsing.htm and |
11274 | 9 # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ |
10 # for background | |
11 | |
12 # takes a tokenizer and elements | |
13 # tokenizer is an iterator that returns type, value pairs | |
14 # elements is a mapping of types to binding strength, prefix and infix actions | |
15 # an action is a tree node name, a tree label, and an optional match | |
16 # __call__(program) parses program into a labelled tree | |
17 | |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
18 import error |
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
19 from i18n import _ |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
20 |
11274 | 21 class parser(object): |
22 def __init__(self, tokenizer, elements, methods=None): | |
23 self._tokenizer = tokenizer | |
24 self._elements = elements | |
25 self._methods = methods | |
13176
895f54a79c6e
templater: use the parser.py parser to extend the templater syntax
Matt Mackall <mpm@selenic.com>
parents:
11449
diff
changeset
|
26 self.current = None |
11274 | 27 def _advance(self): |
28 'advance the tokenizer' | |
29 t = self.current | |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
30 try: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
31 self.current = self._iter.next() |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
32 except StopIteration: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
33 pass |
11274 | 34 return t |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
35 def _match(self, m, pos): |
11274 | 36 'make sure the tokenizer matches an end condition' |
37 if self.current[0] != m: | |
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
38 raise error.ParseError(_("unexpected token: %s") % self.current[0], |
11305
d4cafcb63f77
cleanups: undefined variables
Dirkjan Ochtman <dirkjan@ochtman.nl>
parents:
11289
diff
changeset
|
39 self.current[2]) |
11274 | 40 self._advance() |
41 def _parse(self, bind=0): | |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
42 token, value, pos = self._advance() |
11274 | 43 # handle prefix rules on current token |
44 prefix = self._elements[token][1] | |
45 if not prefix: | |
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
46 raise error.ParseError(_("not a prefix: %s") % token, pos) |
11274 | 47 if len(prefix) == 1: |
48 expr = (prefix[0], value) | |
49 else: | |
50 if len(prefix) > 2 and prefix[2] == self.current[0]: | |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
51 self._match(prefix[2], pos) |
11274 | 52 expr = (prefix[0], None) |
53 else: | |
54 expr = (prefix[0], self._parse(prefix[1])) | |
55 if len(prefix) > 2: | |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
56 self._match(prefix[2], pos) |
11274 | 57 # gather tokens until we meet a lower binding strength |
58 while bind < self._elements[self.current[0]][0]: | |
11289
4215ce511134
revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents:
11278
diff
changeset
|
59 token, value, pos = self._advance() |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
60 e = self._elements[token] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
61 # check for suffix - next token isn't a valid prefix |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
62 if len(e) == 4 and not self._elements[self.current[0]][1]: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
63 suffix = e[3] |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
64 expr = (suffix[0], expr) |
11274 | 65 else: |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
66 # handle infix rules |
11412
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
67 if len(e) < 3 or not e[2]: |
14701
4b93bd041772
parsers: fix localization markup of parser errors
Mads Kiilerich <mads@kiilerich.com>
parents:
13665
diff
changeset
|
68 raise error.ParseError(_("not an infix: %s") % token, pos) |
11412
51ceb1571805
parser: improve infix error checking
Matt Mackall <mpm@selenic.com>
parents:
11319
diff
changeset
|
69 infix = e[2] |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
70 if len(infix) == 3 and infix[2] == self.current[0]: |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
71 self._match(infix[2], pos) |
11278
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
72 expr = (infix[0], expr, (None)) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
73 else: |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
74 expr = (infix[0], expr, self._parse(infix[1])) |
7df88cdf47fd
revset: add support for prefix and suffix versions of : and ::
Matt Mackall <mpm@selenic.com>
parents:
11274
diff
changeset
|
75 if len(infix) == 3: |
11319
9d1cf337a78d
parser: fix missing param in _match
Peter Arrenbrecht <peter.arrenbrecht@gmail.com>
parents:
11305
diff
changeset
|
76 self._match(infix[2], pos) |
11274 | 77 return expr |
78 def parse(self, message): | |
79 'generate a parse tree from a message' | |
80 self._iter = self._tokenizer(message) | |
13176
895f54a79c6e
templater: use the parser.py parser to extend the templater syntax
Matt Mackall <mpm@selenic.com>
parents:
11449
diff
changeset
|
81 self._advance() |
13665
e798e430c5e5
revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents:
13176
diff
changeset
|
82 res = self._parse() |
e798e430c5e5
revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents:
13176
diff
changeset
|
83 token, value, pos = self.current |
e798e430c5e5
revset: report a parse error if a revset is not parsed completely (issue2654)
Bernhard Leiner <bleiner@gmail.com>
parents:
13176
diff
changeset
|
84 return res, pos |
11274 | 85 def eval(self, tree): |
86 'recursively evaluate a parse tree using node methods' | |
87 if not isinstance(tree, tuple): | |
88 return tree | |
89 return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) | |
90 def __call__(self, message): | |
91 'parse a message into a parse tree and evaluate if methods given' | |
92 t = self.parse(message) | |
93 if self._methods: | |
94 return self.eval(t) | |
95 return t |