view mercurial/parser.py @ 23724:aafeaba22826 stable

revset: drop pre-lazyset optimization for stringset of subset == entire repo It was introduced at e44ebd2a142a, where spanset.__contains__() did not exist. Nowadays, we have to pay huge penalty for len(subset). The following example showed that OR operation could be O(n * m^2) (n: len(repo), m: number of OR operators, m >= 2) probably because of filteredset.__len__. revset #0: 0|1|2|3|4|5|6|7|8|9 0) wall 8.092713 comb 8.090000 user 8.090000 sys 0.000000 (best of 3) 1) wall 0.445354 comb 0.450000 user 0.430000 sys 0.020000 (best of 22) 2) wall 0.000389 comb 0.000000 user 0.000000 sys 0.000000 (best of 7347) (0: 3.2.4, 1: 3.1.2, 2: this patch)
author Yuya Nishihara <yuya@tcha.org>
date Sat, 03 Jan 2015 10:25:08 +0900
parents 7c4778bc29f0
children d647f97f88dd
line wrap: on
line source

# parser.py - simple top-down operator precedence parser for mercurial
#
# Copyright 2010 Matt Mackall <mpm@selenic.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

# see http://effbot.org/zone/simple-top-down-parsing.htm and
# http://eli.thegreenplace.net/2010/01/02/top-down-operator-precedence-parsing/
# for background

# takes a tokenizer and elements
# tokenizer is an iterator that returns type, value pairs
# elements is a mapping of types to binding strength, prefix and infix actions
# an action is a tree node name, a tree label, and an optional match
# __call__(program) parses program into a labeled tree

import error
from i18n import _

class parser(object):
    def __init__(self, tokenizer, elements, methods=None):
        self._tokenizer = tokenizer
        self._elements = elements
        self._methods = methods
        self.current = None
    def _advance(self):
        'advance the tokenizer'
        t = self.current
        try:
            self.current = self._iter.next()
        except StopIteration:
            pass
        return t
    def _match(self, m, pos):
        'make sure the tokenizer matches an end condition'
        if self.current[0] != m:
            raise error.ParseError(_("unexpected token: %s") % self.current[0],
                                   self.current[2])
        self._advance()
    def _parse(self, bind=0):
        token, value, pos = self._advance()
        # handle prefix rules on current token
        prefix = self._elements[token][1]
        if not prefix:
            raise error.ParseError(_("not a prefix: %s") % token, pos)
        if len(prefix) == 1:
            expr = (prefix[0], value)
        else:
            if len(prefix) > 2 and prefix[2] == self.current[0]:
                self._match(prefix[2], pos)
                expr = (prefix[0], None)
            else:
                expr = (prefix[0], self._parse(prefix[1]))
                if len(prefix) > 2:
                    self._match(prefix[2], pos)
        # gather tokens until we meet a lower binding strength
        while bind < self._elements[self.current[0]][0]:
            token, value, pos = self._advance()
            e = self._elements[token]
            # check for suffix - next token isn't a valid prefix
            if len(e) == 4 and not self._elements[self.current[0]][1]:
                suffix = e[3]
                expr = (suffix[0], expr)
            else:
                # handle infix rules
                if len(e) < 3 or not e[2]:
                    raise error.ParseError(_("not an infix: %s") % token, pos)
                infix = e[2]
                if len(infix) == 3 and infix[2] == self.current[0]:
                    self._match(infix[2], pos)
                    expr = (infix[0], expr, (None))
                else:
                    expr = (infix[0], expr, self._parse(infix[1]))
                    if len(infix) == 3:
                        self._match(infix[2], pos)
        return expr
    def parse(self, message, lookup=None):
        'generate a parse tree from a message'
        if lookup:
            self._iter = self._tokenizer(message, lookup)
        else:
            self._iter = self._tokenizer(message)
        self._advance()
        res = self._parse()
        token, value, pos = self.current
        return res, pos
    def eval(self, tree):
        'recursively evaluate a parse tree using node methods'
        if not isinstance(tree, tuple):
            return tree
        return self._methods[tree[0]](*[self.eval(t) for t in tree[1:]])
    def __call__(self, message):
        'parse a message into a parse tree and evaluate if methods given'
        t = self.parse(message)
        if self._methods:
            return self.eval(t)
        return t