mercurial/filesetlang.py
author Yuya Nishihara <yuya@tcha.org>
Sat, 21 Jul 2018 17:13:34 +0900
changeset 38864 73731fa8d1bd
parent 38863 61ab546b71c3
child 38865 899b4c74209c
permissions -rw-r--r--
fileset: reorder 'or' expression by weight
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
38805
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38804
diff changeset
     1
# filesetlang.py - parser, tokenizer and utility for file set language
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     2
#
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     3
# Copyright 2010 Matt Mackall <mpm@selenic.com>
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     4
#
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     5
# This software may be used and distributed according to the terms of the
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     6
# GNU General Public License version 2 or any later version.
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
     7
25938
e194ada8d45f fileset: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25815
diff changeset
     8
from __future__ import absolute_import
e194ada8d45f fileset: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25815
diff changeset
     9
e194ada8d45f fileset: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25815
diff changeset
    10
from .i18n import _
e194ada8d45f fileset: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25815
diff changeset
    11
from . import (
e194ada8d45f fileset: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25815
diff changeset
    12
    error,
e194ada8d45f fileset: use absolute_import
Gregory Szorc <gregory.szorc@gmail.com>
parents: 25815
diff changeset
    13
    parser,
32523
1fb0a85fb20e py3: use pycompat.bytestr so that we don't get ascii values
Pulkit Goyal <7895pulkit@gmail.com>
parents: 32291
diff changeset
    14
    pycompat,
37084
f0b6fbea00cf stringutil: bulk-replace call sites to point to new module
Yuya Nishihara <yuya@tcha.org>
parents: 36505
diff changeset
    15
)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    16
38863
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    17
# common weight constants for static optimization
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    18
# (see registrar.filesetpredicate for details)
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    19
WEIGHT_CHECK_FILENAME = 0.5
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    20
WEIGHT_READ_CONTENTS = 30
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    21
WEIGHT_STATUS = 10
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    22
WEIGHT_STATUS_THOROUGH = 50
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
    23
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    24
elements = {
25815
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    25
    # token-type: binding-strength, primary, prefix, infix, suffix
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    26
    "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
35741
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
    27
    ":": (15, None, None, ("kindpat", 15), None),
25815
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    28
    "-": (5, None, ("negate", 19), ("minus", 5), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    29
    "not": (10, None, ("not", 10), None, None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    30
    "!": (10, None, ("not", 10), None, None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    31
    "and": (5, None, None, ("and", 5), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    32
    "&": (5, None, None, ("and", 5), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    33
    "or": (4, None, None, ("or", 4), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    34
    "|": (4, None, None, ("or", 4), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    35
    "+": (4, None, None, ("or", 4), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    36
    ",": (2, None, None, ("list", 2), None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    37
    ")": (0, None, None, None, None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    38
    "symbol": (0, "symbol", None, None, None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    39
    "string": (0, "string", None, None, None),
e71e5629e006 parser: separate actions for primary expression and prefix operator
Yuya Nishihara <yuya@tcha.org>
parents: 25801
diff changeset
    40
    "end": (0, None, None, None, None),
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    41
}
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    42
32291
bd872f64a8ba cleanup: use set literals
Martin von Zweigbergk <martinvonz@google.com>
parents: 32134
diff changeset
    43
keywords = {'and', 'or', 'not'}
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    44
38805
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38804
diff changeset
    45
symbols = {}
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38804
diff changeset
    46
19470
19ac0d8ee9a2 fileset: handle underbar in symbols
Matt Mackall <mpm@selenic.com>
parents: 19194
diff changeset
    47
globchars = ".*{}[]?/\\_"
14551
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
    48
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    49
def tokenize(program):
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    50
    pos, l = 0, len(program)
32523
1fb0a85fb20e py3: use pycompat.bytestr so that we don't get ascii values
Pulkit Goyal <7895pulkit@gmail.com>
parents: 32291
diff changeset
    51
    program = pycompat.bytestr(program)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    52
    while pos < l:
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    53
        c = program[pos]
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    54
        if c.isspace(): # skip inter-token whitespace
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    55
            pass
35741
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
    56
        elif c in "(),-:|&+!": # handle simple operators
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11284
diff changeset
    57
            yield (c, None, pos)
12408
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    58
        elif (c in '"\'' or c == 'r' and
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    59
              program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    60
            if c == 'r':
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    61
                pos += 1
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    62
                c = program[pos]
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    63
                decode = lambda x: x
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    64
            else:
26233
d3dbb65c8dc6 fileset: handle error of string unescaping
Yuya Nishihara <yuya@tcha.org>
parents: 26195
diff changeset
    65
                decode = parser.unescapestr
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    66
            pos += 1
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    67
            s = pos
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    68
            while pos < l: # find closing quote
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    69
                d = program[pos]
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    70
                if d == '\\': # skip over escaped characters
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    71
                    pos += 2
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    72
                    continue
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    73
                if d == c:
12408
78a97859b90d revset: support raw string literals
Brodie Rao <brodie@bitheap.org>
parents: 12401
diff changeset
    74
                    yield ('string', decode(program[s:pos]), s)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    75
                    break
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    76
                pos += 1
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    77
            else:
11383
de544774ebea revset: all your error messages are belong to _
Martin Geisler <mg@lazybytes.net>
parents: 11349
diff changeset
    78
                raise error.ParseError(_("unterminated string"), s)
14551
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
    79
        elif c.isalnum() or c in globchars or ord(c) > 127:
14513
85fe676c27e9 fileset: fix long line
Matt Mackall <mpm@selenic.com>
parents: 14511
diff changeset
    80
            # gather up a symbol/keyword
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    81
            s = pos
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    82
            pos += 1
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    83
            while pos < l: # find end of symbol
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    84
                d = program[pos]
14551
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
    85
                if not (d.isalnum() or d in globchars or ord(d) > 127):
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    86
                    break
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    87
                pos += 1
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    88
            sym = program[s:pos]
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    89
            if sym in keywords: # operator keywords
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11284
diff changeset
    90
                yield (sym, None, s)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    91
            else:
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11284
diff changeset
    92
                yield ('symbol', sym, s)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    93
            pos -= 1
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    94
        else:
11383
de544774ebea revset: all your error messages are belong to _
Martin Geisler <mg@lazybytes.net>
parents: 11349
diff changeset
    95
            raise error.ParseError(_("syntax error"), pos)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    96
        pos += 1
11289
4215ce511134 revset: raise ParseError exceptions
Matt Mackall <mpm@selenic.com>
parents: 11284
diff changeset
    97
    yield ('end', None, pos)
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
    98
20208
61a47fd64f30 fileset, revset: do not use global parser object for thread safety
Yuya Nishihara <yuya@tcha.org>
parents: 19470
diff changeset
    99
def parse(expr):
25654
af329a84310c parser: accept iterator of tokens instead of tokenizer function and program
Yuya Nishihara <yuya@tcha.org>
parents: 25633
diff changeset
   100
    p = parser.parser(elements)
af329a84310c parser: accept iterator of tokens instead of tokenizer function and program
Yuya Nishihara <yuya@tcha.org>
parents: 25633
diff changeset
   101
    tree, pos = p.parse(tokenize(expr))
25252
ac381dd7a21f fileset: move validation of incomplete parsing to parse() function
Yuya Nishihara <yuya@tcha.org>
parents: 24408
diff changeset
   102
    if pos != len(expr):
ac381dd7a21f fileset: move validation of incomplete parsing to parse() function
Yuya Nishihara <yuya@tcha.org>
parents: 24408
diff changeset
   103
        raise error.ParseError(_("invalid token"), pos)
38804
d82c4d42b615 fileset: flatten 'or' nodes to unnest unionmatchers
Yuya Nishihara <yuya@tcha.org>
parents: 38803
diff changeset
   104
    return parser.simplifyinfixops(tree, {'list', 'or'})
11275
c9ce8ecd6ca1 revset: introduce revset core
Matt Mackall <mpm@selenic.com>
parents:
diff changeset
   105
35691
735f47b41521 fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents: 35615
diff changeset
   106
def getsymbol(x):
735f47b41521 fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents: 35615
diff changeset
   107
    if x and x[0] == 'symbol':
735f47b41521 fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents: 35615
diff changeset
   108
        return x[1]
735f47b41521 fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents: 35615
diff changeset
   109
    raise error.ParseError(_('not a symbol'))
735f47b41521 fileset: make it robust for bad function calls
Yuya Nishihara <yuya@tcha.org>
parents: 35615
diff changeset
   110
14551
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
   111
def getstring(x, err):
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
   112
    if x and (x[0] == 'string' or x[0] == 'symbol'):
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
   113
        return x[1]
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
   114
    raise error.ParseError(err)
68d814a3cefd fileset: basic pattern and boolean support
Matt Mackall <mpm@selenic.com>
parents: 14513
diff changeset
   115
38805
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38804
diff changeset
   116
def getkindpat(x, y, allkinds, err):
35741
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   117
    kind = getsymbol(x)
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   118
    pat = getstring(y, err)
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   119
    if kind not in allkinds:
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   120
        raise error.ParseError(_("invalid pattern kind: %s") % kind)
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   121
    return '%s:%s' % (kind, pat)
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   122
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   123
def getpattern(x, allkinds, err):
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   124
    if x and x[0] == 'kindpat':
38805
b9162ea1b815 fileset: extract language processing part to new module (API)
Yuya Nishihara <yuya@tcha.org>
parents: 38804
diff changeset
   125
        return getkindpat(x[1], x[2], allkinds, err)
35741
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   126
    return getstring(x, err)
73432eee0ac4 fileset: add kind:pat operator
Yuya Nishihara <yuya@tcha.org>
parents: 35739
diff changeset
   127
38598
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   128
def getlist(x):
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   129
    if not x:
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   130
        return []
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   131
    if x[0] == 'list':
38803
4dc498d61d86 fileset: flatten arguments list
Yuya Nishihara <yuya@tcha.org>
parents: 38772
diff changeset
   132
        return list(x[1:])
38598
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   133
    return [x]
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   134
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   135
def getargs(x, min, max, err):
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   136
    l = getlist(x)
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   137
    if len(l) < min or len(l) > max:
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   138
        raise error.ParseError(err)
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   139
    return l
d046bf37f1ba fileset: move helper functions to top
Yuya Nishihara <yuya@tcha.org>
parents: 38414
diff changeset
   140
38826
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   141
def _analyze(x):
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   142
    if x is None:
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   143
        return x
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   144
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   145
    op = x[0]
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   146
    if op in {'string', 'symbol'}:
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   147
        return x
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   148
    if op == 'kindpat':
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   149
        getsymbol(x[1])  # kind must be a symbol
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   150
        t = _analyze(x[2])
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   151
        return (op, x[1], t)
38827
48fc2a8af345 fileset: drop 'group' node from tree to be evaluated
Yuya Nishihara <yuya@tcha.org>
parents: 38826
diff changeset
   152
    if op == 'group':
48fc2a8af345 fileset: drop 'group' node from tree to be evaluated
Yuya Nishihara <yuya@tcha.org>
parents: 38826
diff changeset
   153
        return _analyze(x[1])
38828
3ea6ce609747 fileset: reject 'negate' node early while transforming parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38827
diff changeset
   154
    if op == 'negate':
3ea6ce609747 fileset: reject 'negate' node early while transforming parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38827
diff changeset
   155
        raise error.ParseError(_("can't use negate operator in this context"))
3ea6ce609747 fileset: reject 'negate' node early while transforming parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38827
diff changeset
   156
    if op == 'not':
38826
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   157
        t = _analyze(x[1])
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   158
        return (op, t)
38832
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   159
    if op == 'and':
38826
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   160
        ta = _analyze(x[1])
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   161
        tb = _analyze(x[2])
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   162
        return (op, ta, tb)
38832
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   163
    if op == 'minus':
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   164
        return _analyze(('and', x[1], ('not', x[2])))
38826
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   165
    if op in {'list', 'or'}:
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   166
        ts = tuple(_analyze(y) for y in x[1:])
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   167
        return (op,) + ts
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   168
    if op == 'func':
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   169
        getsymbol(x[1])  # function name must be a symbol
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   170
        ta = _analyze(x[2])
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   171
        return (op, x[1], ta)
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   172
    raise error.ProgrammingError('invalid operator %r' % op)
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   173
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   174
def analyze(x):
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   175
    """Transform raw parsed tree to evaluatable tree which can be fed to
38829
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   176
    optimize() or getmatch()
38826
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   177
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   178
    All pseudo operations should be mapped to real operations or functions
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   179
    defined in methods or symbols table respectively.
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   180
    """
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   181
    return _analyze(x)
6371ab78c3b3 fileset: add phase to transform parsed tree
Yuya Nishihara <yuya@tcha.org>
parents: 38805
diff changeset
   182
38832
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   183
def _optimizeandops(op, ta, tb):
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   184
    if tb is not None and tb[0] == 'not':
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   185
        return ('minus', ta, tb[1])
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   186
    return (op, ta, tb)
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   187
38829
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   188
def _optimize(x):
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   189
    if x is None:
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   190
        return 0, x
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   191
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   192
    op = x[0]
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   193
    if op in {'string', 'symbol'}:
38863
61ab546b71c3 fileset: introduce weight constants for readability
Yuya Nishihara <yuya@tcha.org>
parents: 38832
diff changeset
   194
        return WEIGHT_CHECK_FILENAME, x
38829
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   195
    if op == 'kindpat':
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   196
        w, t = _optimize(x[2])
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   197
        return w, (op, x[1], t)
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   198
    if op == 'not':
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   199
        w, t = _optimize(x[1])
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   200
        return w, (op, t)
38831
b975c5801487 fileset: reorder 'and' expression to evaluate basic patterns first
Yuya Nishihara <yuya@tcha.org>
parents: 38829
diff changeset
   201
    if op == 'and':
b975c5801487 fileset: reorder 'and' expression to evaluate basic patterns first
Yuya Nishihara <yuya@tcha.org>
parents: 38829
diff changeset
   202
        wa, ta = _optimize(x[1])
b975c5801487 fileset: reorder 'and' expression to evaluate basic patterns first
Yuya Nishihara <yuya@tcha.org>
parents: 38829
diff changeset
   203
        wb, tb = _optimize(x[2])
b975c5801487 fileset: reorder 'and' expression to evaluate basic patterns first
Yuya Nishihara <yuya@tcha.org>
parents: 38829
diff changeset
   204
        if wa <= wb:
38832
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   205
            return wa, _optimizeandops(op, ta, tb)
38831
b975c5801487 fileset: reorder 'and' expression to evaluate basic patterns first
Yuya Nishihara <yuya@tcha.org>
parents: 38829
diff changeset
   206
        else:
38832
ca4de8ba5b5f fileset: optimize 'x and not y' to 'x - y'
Yuya Nishihara <yuya@tcha.org>
parents: 38831
diff changeset
   207
            return wb, _optimizeandops(op, tb, ta)
38829
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   208
    if op == 'or':
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   209
        ws, ts = zip(*(_optimize(y) for y in x[1:]))
38864
73731fa8d1bd fileset: reorder 'or' expression by weight
Yuya Nishihara <yuya@tcha.org>
parents: 38863
diff changeset
   210
        ts = tuple(it[1] for it in sorted(enumerate(ts),
73731fa8d1bd fileset: reorder 'or' expression by weight
Yuya Nishihara <yuya@tcha.org>
parents: 38863
diff changeset
   211
                                          key=lambda it: ws[it[0]]))
38829
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   212
        return max(ws), (op,) + ts
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   213
    if op == 'list':
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   214
        ws, ts = zip(*(_optimize(y) for y in x[1:]))
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   215
        return sum(ws), (op,) + ts
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   216
    if op == 'func':
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   217
        f = getsymbol(x[1])
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   218
        w = getattr(symbols.get(f), '_weight', 1)
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   219
        wa, ta = _optimize(x[2])
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   220
        return w + wa, (op, x[1], ta)
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   221
    raise error.ProgrammingError('invalid operator %r' % op)
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   222
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   223
def optimize(x):
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   224
    """Reorder/rewrite evaluatable tree for optimization
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   225
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   226
    All pseudo operations should be transformed beforehand.
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   227
    """
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   228
    _w, t = _optimize(x)
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   229
    return t
7e7e2b2ff284 fileset: add stub for weight-based optimization
Yuya Nishihara <yuya@tcha.org>
parents: 38828
diff changeset
   230
25255
ad1d2c952889 fileset: pretty print syntax tree in debug output
Yuya Nishihara <yuya@tcha.org>
parents: 25252
diff changeset
   231
def prettyformat(tree):
ad1d2c952889 fileset: pretty print syntax tree in debug output
Yuya Nishihara <yuya@tcha.org>
parents: 25252
diff changeset
   232
    return parser.prettyformat(tree, ('string', 'symbol'))