fileset: extract language processing part to new module (API)
I'll add a couple more functions that work on parsed tree.
% wc -l mercurial/fileset*.py
559 mercurial/fileset.py
135 mercurial/filesetlang.py
694 total
--- a/hgext/lfs/__init__.py Sat Jul 21 15:23:56 2018 +0900
+++ b/hgext/lfs/__init__.py Sun Jul 22 15:50:45 2018 +0900
@@ -136,7 +136,7 @@
exchange,
extensions,
filelog,
- fileset,
+ filesetlang,
hg,
localrepo,
minifileset,
@@ -261,7 +261,7 @@
# deprecated config: lfs.threshold
threshold = repo.ui.configbytes('lfs', 'threshold')
if threshold:
- fileset.parse(trackspec) # make sure syntax errors are confined
+ filesetlang.parse(trackspec) # make sure syntax errors are confined
trackspec = "(%s) | size('>%d')" % (trackspec, threshold)
return minifileset.compile(trackspec)
@@ -361,7 +361,7 @@
def lfsfileset(mctx, x):
"""File that uses LFS storage."""
# i18n: "lfs" is a keyword
- fileset.getargs(x, 0, 0, _("lfs takes no arguments"))
+ filesetlang.getargs(x, 0, 0, _("lfs takes no arguments"))
ctx = mctx.ctx
def lfsfilep(f):
return wrapper.pointerfromctx(ctx, f, removed=True) is not None
--- a/mercurial/debugcommands.py Sat Jul 21 15:23:56 2018 +0900
+++ b/mercurial/debugcommands.py Sun Jul 22 15:50:45 2018 +0900
@@ -48,7 +48,7 @@
exchange,
extensions,
filemerge,
- fileset,
+ filesetlang,
formatter,
hg,
httppeer,
@@ -916,13 +916,13 @@
raise error.Abort(_('invalid stage name: %s') % n)
showalways.update(opts['show_stage'])
- tree = fileset.parse(expr)
+ tree = filesetlang.parse(expr)
for n, f in stages:
tree = f(tree)
if n in showalways:
if opts['show_stage'] or n != 'parsed':
ui.write(("* %s:\n") % n)
- ui.write(fileset.prettyformat(tree), "\n")
+ ui.write(filesetlang.prettyformat(tree), "\n")
files = set()
if opts['all_files']:
--- a/mercurial/fileset.py Sat Jul 21 15:23:56 2018 +0900
+++ b/mercurial/fileset.py Sun Jul 22 15:50:45 2018 +0900
@@ -13,9 +13,9 @@
from .i18n import _
from . import (
error,
+ filesetlang,
match as matchmod,
merge,
- parser,
pycompat,
registrar,
scmutil,
@@ -25,120 +25,12 @@
stringutil,
)
-elements = {
- # token-type: binding-strength, primary, prefix, infix, suffix
- "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
- ":": (15, None, None, ("kindpat", 15), None),
- "-": (5, None, ("negate", 19), ("minus", 5), None),
- "not": (10, None, ("not", 10), None, None),
- "!": (10, None, ("not", 10), None, None),
- "and": (5, None, None, ("and", 5), None),
- "&": (5, None, None, ("and", 5), None),
- "or": (4, None, None, ("or", 4), None),
- "|": (4, None, None, ("or", 4), None),
- "+": (4, None, None, ("or", 4), None),
- ",": (2, None, None, ("list", 2), None),
- ")": (0, None, None, None, None),
- "symbol": (0, "symbol", None, None, None),
- "string": (0, "string", None, None, None),
- "end": (0, None, None, None, None),
-}
-
-keywords = {'and', 'or', 'not'}
-
-globchars = ".*{}[]?/\\_"
-
-def tokenize(program):
- pos, l = 0, len(program)
- program = pycompat.bytestr(program)
- while pos < l:
- c = program[pos]
- if c.isspace(): # skip inter-token whitespace
- pass
- elif c in "(),-:|&+!": # handle simple operators
- yield (c, None, pos)
- elif (c in '"\'' or c == 'r' and
- program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
- if c == 'r':
- pos += 1
- c = program[pos]
- decode = lambda x: x
- else:
- decode = parser.unescapestr
- pos += 1
- s = pos
- while pos < l: # find closing quote
- d = program[pos]
- if d == '\\': # skip over escaped characters
- pos += 2
- continue
- if d == c:
- yield ('string', decode(program[s:pos]), s)
- break
- pos += 1
- else:
- raise error.ParseError(_("unterminated string"), s)
- elif c.isalnum() or c in globchars or ord(c) > 127:
- # gather up a symbol/keyword
- s = pos
- pos += 1
- while pos < l: # find end of symbol
- d = program[pos]
- if not (d.isalnum() or d in globchars or ord(d) > 127):
- break
- pos += 1
- sym = program[s:pos]
- if sym in keywords: # operator keywords
- yield (sym, None, s)
- else:
- yield ('symbol', sym, s)
- pos -= 1
- else:
- raise error.ParseError(_("syntax error"), pos)
- pos += 1
- yield ('end', None, pos)
-
-def parse(expr):
- p = parser.parser(elements)
- tree, pos = p.parse(tokenize(expr))
- if pos != len(expr):
- raise error.ParseError(_("invalid token"), pos)
- return parser.simplifyinfixops(tree, {'list', 'or'})
-
-def getsymbol(x):
- if x and x[0] == 'symbol':
- return x[1]
- raise error.ParseError(_('not a symbol'))
-
-def getstring(x, err):
- if x and (x[0] == 'string' or x[0] == 'symbol'):
- return x[1]
- raise error.ParseError(err)
-
-def _getkindpat(x, y, allkinds, err):
- kind = getsymbol(x)
- pat = getstring(y, err)
- if kind not in allkinds:
- raise error.ParseError(_("invalid pattern kind: %s") % kind)
- return '%s:%s' % (kind, pat)
-
-def getpattern(x, allkinds, err):
- if x and x[0] == 'kindpat':
- return _getkindpat(x[1], x[2], allkinds, err)
- return getstring(x, err)
-
-def getlist(x):
- if not x:
- return []
- if x[0] == 'list':
- return list(x[1:])
- return [x]
-
-def getargs(x, min, max, err):
- l = getlist(x)
- if len(l) < min or len(l) > max:
- raise error.ParseError(err)
- return l
+# helpers for processing parsed tree
+getsymbol = filesetlang.getsymbol
+getstring = filesetlang.getstring
+_getkindpat = filesetlang.getkindpat
+getpattern = filesetlang.getpattern
+getargs = filesetlang.getargs
def getmatch(mctx, x):
if not x:
@@ -192,7 +84,7 @@
# with:
# mctx - current matchctx instance
# x - argument in tree form
-symbols = {}
+symbols = filesetlang.symbols
# filesets using matchctx.status()
_statuscallers = set()
@@ -635,7 +527,7 @@
def match(ctx, expr, badfn=None):
"""Create a matcher for a single fileset expression"""
- tree = parse(expr)
+ tree = filesetlang.parse(expr)
mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn)
return getmatch(mctx, tree)
@@ -653,9 +545,6 @@
else:
return None
-def prettyformat(tree):
- return parser.prettyformat(tree, ('string', 'symbol'))
-
def loadpredicate(ui, extname, registrarobj):
"""Load fileset predicates from specified registrarobj
"""
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/filesetlang.py Sun Jul 22 15:50:45 2018 +0900
@@ -0,0 +1,135 @@
+# filesetlang.py - parser, tokenizer and utility for file set language
+#
+# Copyright 2010 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from .i18n import _
+from . import (
+ error,
+ parser,
+ pycompat,
+)
+
+elements = {
+ # token-type: binding-strength, primary, prefix, infix, suffix
+ "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
+ ":": (15, None, None, ("kindpat", 15), None),
+ "-": (5, None, ("negate", 19), ("minus", 5), None),
+ "not": (10, None, ("not", 10), None, None),
+ "!": (10, None, ("not", 10), None, None),
+ "and": (5, None, None, ("and", 5), None),
+ "&": (5, None, None, ("and", 5), None),
+ "or": (4, None, None, ("or", 4), None),
+ "|": (4, None, None, ("or", 4), None),
+ "+": (4, None, None, ("or", 4), None),
+ ",": (2, None, None, ("list", 2), None),
+ ")": (0, None, None, None, None),
+ "symbol": (0, "symbol", None, None, None),
+ "string": (0, "string", None, None, None),
+ "end": (0, None, None, None, None),
+}
+
+keywords = {'and', 'or', 'not'}
+
+symbols = {}
+
+globchars = ".*{}[]?/\\_"
+
+def tokenize(program):
+ pos, l = 0, len(program)
+ program = pycompat.bytestr(program)
+ while pos < l:
+ c = program[pos]
+ if c.isspace(): # skip inter-token whitespace
+ pass
+ elif c in "(),-:|&+!": # handle simple operators
+ yield (c, None, pos)
+ elif (c in '"\'' or c == 'r' and
+ program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
+ if c == 'r':
+ pos += 1
+ c = program[pos]
+ decode = lambda x: x
+ else:
+ decode = parser.unescapestr
+ pos += 1
+ s = pos
+ while pos < l: # find closing quote
+ d = program[pos]
+ if d == '\\': # skip over escaped characters
+ pos += 2
+ continue
+ if d == c:
+ yield ('string', decode(program[s:pos]), s)
+ break
+ pos += 1
+ else:
+ raise error.ParseError(_("unterminated string"), s)
+ elif c.isalnum() or c in globchars or ord(c) > 127:
+ # gather up a symbol/keyword
+ s = pos
+ pos += 1
+ while pos < l: # find end of symbol
+ d = program[pos]
+ if not (d.isalnum() or d in globchars or ord(d) > 127):
+ break
+ pos += 1
+ sym = program[s:pos]
+ if sym in keywords: # operator keywords
+ yield (sym, None, s)
+ else:
+ yield ('symbol', sym, s)
+ pos -= 1
+ else:
+ raise error.ParseError(_("syntax error"), pos)
+ pos += 1
+ yield ('end', None, pos)
+
+def parse(expr):
+ p = parser.parser(elements)
+ tree, pos = p.parse(tokenize(expr))
+ if pos != len(expr):
+ raise error.ParseError(_("invalid token"), pos)
+ return parser.simplifyinfixops(tree, {'list', 'or'})
+
+def getsymbol(x):
+ if x and x[0] == 'symbol':
+ return x[1]
+ raise error.ParseError(_('not a symbol'))
+
+def getstring(x, err):
+ if x and (x[0] == 'string' or x[0] == 'symbol'):
+ return x[1]
+ raise error.ParseError(err)
+
+def getkindpat(x, y, allkinds, err):
+ kind = getsymbol(x)
+ pat = getstring(y, err)
+ if kind not in allkinds:
+ raise error.ParseError(_("invalid pattern kind: %s") % kind)
+ return '%s:%s' % (kind, pat)
+
+def getpattern(x, allkinds, err):
+ if x and x[0] == 'kindpat':
+ return getkindpat(x[1], x[2], allkinds, err)
+ return getstring(x, err)
+
+def getlist(x):
+ if not x:
+ return []
+ if x[0] == 'list':
+ return list(x[1:])
+ return [x]
+
+def getargs(x, min, max, err):
+ l = getlist(x)
+ if len(l) < min or len(l) > max:
+ raise error.ParseError(err)
+ return l
+
+def prettyformat(tree):
+ return parser.prettyformat(tree, ('string', 'symbol'))
--- a/mercurial/minifileset.py Sat Jul 21 15:23:56 2018 +0900
+++ b/mercurial/minifileset.py Sun Jul 22 15:50:45 2018 +0900
@@ -11,12 +11,13 @@
from . import (
error,
fileset,
+ filesetlang,
pycompat,
)
def _sizep(x):
# i18n: "size" is a keyword
- expr = fileset.getstring(x, _("size requires an expression"))
+ expr = filesetlang.getstring(x, _("size requires an expression"))
return fileset.sizematcher(expr)
def _compile(tree):
@@ -24,7 +25,7 @@
raise error.ParseError(_("missing argument"))
op = tree[0]
if op in {'symbol', 'string', 'kindpat'}:
- name = fileset.getpattern(tree, {'path'}, _('invalid file pattern'))
+ name = filesetlang.getpattern(tree, {'path'}, _('invalid file pattern'))
if name.startswith('**'): # file extension test, ex. "**.tar.gz"
ext = name[2:]
for c in pycompat.bytestr(ext):
@@ -57,7 +58,7 @@
'size': lambda n, s: _sizep(tree[2])(s),
}
- name = fileset.getsymbol(tree[1])
+ name = filesetlang.getsymbol(tree[1])
if name in symbols:
return symbols[name]
@@ -87,5 +88,5 @@
files whose name ends with ".zip", and all files under "bin" in the repo
root except for "bin/README".
"""
- tree = fileset.parse(text)
+ tree = filesetlang.parse(text)
return _compile(tree)