Mercurial > hg
view mercurial/parser.py @ 24443:539b3c7eea44
phase: compute phases in C
Previously, the phase computation would grow much slower as the oldest draft
commit in the repository grew older (which is very common in repos with evolve
on) and the number of commits increase.
By rewriting the computation in C we can speed it up from 700ms to 7ms on
a large repository whose oldest draft commit is a year old.
author | Laurent Charignon <lcharignon@fb.com> |
---|---|
date | Tue, 24 Mar 2015 11:00:09 -0700 |
parents | 7c4778bc29f0 |
children | d647f97f88dd |
line wrap: on
line source
# parser.py - simple top-down operator precedence parser for mercurial # # Copyright 2010 Matt Mackall <mpm@selenic.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. # see http://effbot.org/zone/simple-top-down-parsing.htm and # http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/ # for background # takes a tokenizer and elements # tokenizer is an iterator that returns type, value pairs # elements is a mapping of types to binding strength, prefix and infix actions # an action is a tree node name, a tree label, and an optional match # __call__(program) parses program into a labeled tree import error from i18n import _ class parser(object): def __init__(self, tokenizer, elements, methods=None): self._tokenizer = tokenizer self._elements = elements self._methods = methods self.current = None def _advance(self): 'advance the tokenizer' t = self.current try: self.current = self._iter.next() except StopIteration: pass return t def _match(self, m, pos): 'make sure the tokenizer matches an end condition' if self.current[0] != m: raise error.ParseError(_("unexpected token: %s") % self.current[0], self.current[2]) self._advance() def _parse(self, bind=0): token, value, pos = self._advance() # handle prefix rules on current token prefix = self._elements[token][1] if not prefix: raise error.ParseError(_("not a prefix: %s") % token, pos) if len(prefix) == 1: expr = (prefix[0], value) else: if len(prefix) > 2 and prefix[2] == self.current[0]: self._match(prefix[2], pos) expr = (prefix[0], None) else: expr = (prefix[0], self._parse(prefix[1])) if len(prefix) > 2: self._match(prefix[2], pos) # gather tokens until we meet a lower binding strength while bind < self._elements[self.current[0]][0]: token, value, pos = self._advance() e = self._elements[token] # check for suffix - next token isn't a valid prefix if len(e) == 4 and not self._elements[self.current[0]][1]: suffix = e[3] expr = (suffix[0], expr) else: # handle infix rules if len(e) < 3 or not e[2]: raise error.ParseError(_("not an infix: %s") % token, pos) infix = e[2] if len(infix) == 3 and infix[2] == self.current[0]: self._match(infix[2], pos) expr = (infix[0], expr, (None)) else: expr = (infix[0], expr, self._parse(infix[1])) if len(infix) == 3: self._match(infix[2], pos) return expr def parse(self, message, lookup=None): 'generate a parse tree from a message' if lookup: self._iter = self._tokenizer(message, lookup) else: self._iter = self._tokenizer(message) self._advance() res = self._parse() token, value, pos = self.current return res, pos def eval(self, tree): 'recursively evaluate a parse tree using node methods' if not isinstance(tree, tuple): return tree return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]]) def __call__(self, message): 'parse a message into a parse tree and evaluate if methods given' t = self.parse(message) if self._methods: return self.eval(t) return t