view mercurial/parser.py @ 23324:69f86b937035

cmdserver: protect pipe server streams against corruption caused by direct io Because pipe-mode server uses stdio as IPC channel, other modules should not touch stdio directly and use ui instead. However, this strategy is brittle because several Python functions read and write stdio implicitly. print 'hello' # should use ui.write() # => ch = 'h', size = 1701604463 'ello', data = '\n' This patch adds protection for such mistakes. Both stdio files and low-level file descriptors are redirected to /dev/null while command server uses them.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 15 Nov 2014 13:50:43 +0900
parents 7c4778bc29f0
children d647f97f88dd
line wrap: on
line source

# parser.py - simple top-down operator precedence parser for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

# see http://effbot.org/zone/simple-top-down-parsing.htm and
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
# for background

# takes a tokenizer and elements
# tokenizer is an iterator that returns type, value pairs
# elements is a mapping of types to binding strength, prefix and infix actions
# an action is a tree node name, a tree label, and an optional match
# __call__(program) parses program into a labeled tree

import error
from i18n import _

class parser(object):
    def __init__(self, tokenizer, elements, methods=None):
        self._tokenizer = tokenizer
        self._elements = elements
        self._methods = methods
        self.current = None
    def _advance(self):
        'advance the tokenizer'
        t = self.current
        try:
            self.current = self._iter.next()
        except StopIteration:
            pass
        return t
    def _match(self, m, pos):
        'make sure the tokenizer matches an end condition'
        if self.current[0] != m:
            raise error.ParseError(_("unexpected token: %s") % self.current[0],
                                   self.current[2])
        self._advance()
    def _parse(self, bind=0):
        token, value, pos = self._advance()
        # handle prefix rules on current token
        prefix = self._elements[token][1]
        if not prefix:
            raise error.ParseError(_("not a prefix: %s") % token, pos)
        if len(prefix) == 1:
            expr = (prefix[0], value)
        else:
            if len(prefix) > 2 and prefix[2] == self.current[0]:
                self._match(prefix[2], pos)
                expr = (prefix[0], None)
            else:
                expr = (prefix[0], self._parse(prefix[1]))
                if len(prefix) > 2:
                    self._match(prefix[2], pos)
        # gather tokens until we meet a lower binding strength
        while bind < self._elements[self.current[0]][0]:
            token, value, pos = self._advance()
            e = self._elements[token]
            # check for suffix - next token isn't a valid prefix
            if len(e) == 4 and not self._elements[self.current[0]][1]:
                suffix = e[3]
                expr = (suffix[0], expr)
            else:
                # handle infix rules
                if len(e) < 3 or not e[2]:
                    raise error.ParseError(_("not an infix: %s") % token, pos)
                infix = e[2]
                if len(infix) == 3 and infix[2] == self.current[0]:
                    self._match(infix[2], pos)
                    expr = (infix[0], expr, (None))
                else:
                    expr = (infix[0], expr, self._parse(infix[1]))
                    if len(infix) == 3:
                        self._match(infix[2], pos)
        return expr
    def parse(self, message, lookup=None):
        'generate a parse tree from a message'
        if lookup:
            self._iter = self._tokenizer(message, lookup)
        else:
            self._iter = self._tokenizer(message)
        self._advance()
        res = self._parse()
        token, value, pos = self.current
        return res, pos
    def eval(self, tree):
        'recursively evaluate a parse tree using node methods'
        if not isinstance(tree, tuple):
            return tree
        return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
    def __call__(self, message):
        'parse a message into a parse tree and evaluate if methods given'
        t = self.parse(message)
        if self._methods:
            return self.eval(t)
        return t