comparison mercurial/filesetlang.py @ 38845:b9162ea1b815

fileset: extract language processing part to new module (API) I'll add a couple more functions that work on parsed tree. % wc -l mercurial/fileset*.py 559 mercurial/fileset.py 135 mercurial/filesetlang.py 694 total
author Yuya Nishihara <yuya@tcha.org>
date Sun, 22 Jul 2018 15:50:45 +0900
parents mercurial/fileset.py@d82c4d42b615
children 6371ab78c3b3
comparison
equal deleted inserted replaced
38844:d82c4d42b615 38845:b9162ea1b815
1 # filesetlang.py - parser, tokenizer and utility for file set language
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 from __future__ import absolute_import
9
10 from .i18n import _
11 from . import (
12 error,
13 parser,
14 pycompat,
15 )
16
17 elements = {
18 # token-type: binding-strength, primary, prefix, infix, suffix
19 "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
20 ":": (15, None, None, ("kindpat", 15), None),
21 "-": (5, None, ("negate", 19), ("minus", 5), None),
22 "not": (10, None, ("not", 10), None, None),
23 "!": (10, None, ("not", 10), None, None),
24 "and": (5, None, None, ("and", 5), None),
25 "&": (5, None, None, ("and", 5), None),
26 "or": (4, None, None, ("or", 4), None),
27 "|": (4, None, None, ("or", 4), None),
28 "+": (4, None, None, ("or", 4), None),
29 ",": (2, None, None, ("list", 2), None),
30 ")": (0, None, None, None, None),
31 "symbol": (0, "symbol", None, None, None),
32 "string": (0, "string", None, None, None),
33 "end": (0, None, None, None, None),
34 }
35
36 keywords = {'and', 'or', 'not'}
37
38 symbols = {}
39
40 globchars = ".*{}[]?/\\_"
41
42 def tokenize(program):
43 pos, l = 0, len(program)
44 program = pycompat.bytestr(program)
45 while pos < l:
46 c = program[pos]
47 if c.isspace(): # skip inter-token whitespace
48 pass
49 elif c in "(),-:|&+!": # handle simple operators
50 yield (c, None, pos)
51 elif (c in '"\'' or c == 'r' and
52 program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
53 if c == 'r':
54 pos += 1
55 c = program[pos]
56 decode = lambda x: x
57 else:
58 decode = parser.unescapestr
59 pos += 1
60 s = pos
61 while pos < l: # find closing quote
62 d = program[pos]
63 if d == '\\': # skip over escaped characters
64 pos += 2
65 continue
66 if d == c:
67 yield ('string', decode(program[s:pos]), s)
68 break
69 pos += 1
70 else:
71 raise error.ParseError(_("unterminated string"), s)
72 elif c.isalnum() or c in globchars or ord(c) > 127:
73 # gather up a symbol/keyword
74 s = pos
75 pos += 1
76 while pos < l: # find end of symbol
77 d = program[pos]
78 if not (d.isalnum() or d in globchars or ord(d) > 127):
79 break
80 pos += 1
81 sym = program[s:pos]
82 if sym in keywords: # operator keywords
83 yield (sym, None, s)
84 else:
85 yield ('symbol', sym, s)
86 pos -= 1
87 else:
88 raise error.ParseError(_("syntax error"), pos)
89 pos += 1
90 yield ('end', None, pos)
91
92 def parse(expr):
93 p = parser.parser(elements)
94 tree, pos = p.parse(tokenize(expr))
95 if pos != len(expr):
96 raise error.ParseError(_("invalid token"), pos)
97 return parser.simplifyinfixops(tree, {'list', 'or'})
98
99 def getsymbol(x):
100 if x and x[0] == 'symbol':
101 return x[1]
102 raise error.ParseError(_('not a symbol'))
103
104 def getstring(x, err):
105 if x and (x[0] == 'string' or x[0] == 'symbol'):
106 return x[1]
107 raise error.ParseError(err)
108
109 def getkindpat(x, y, allkinds, err):
110 kind = getsymbol(x)
111 pat = getstring(y, err)
112 if kind not in allkinds:
113 raise error.ParseError(_("invalid pattern kind: %s") % kind)
114 return '%s:%s' % (kind, pat)
115
116 def getpattern(x, allkinds, err):
117 if x and x[0] == 'kindpat':
118 return getkindpat(x[1], x[2], allkinds, err)
119 return getstring(x, err)
120
121 def getlist(x):
122 if not x:
123 return []
124 if x[0] == 'list':
125 return list(x[1:])
126 return [x]
127
128 def getargs(x, min, max, err):
129 l = getlist(x)
130 if len(l) < min or len(l) > max:
131 raise error.ParseError(err)
132 return l
133
134 def prettyformat(tree):
135 return parser.prettyformat(tree, ('string', 'symbol'))