Mercurial > hg-stable
changeset 38845:b9162ea1b815
fileset: extract language processing part to new module (API)
I'll add a couple more functions that work on parsed tree.
% wc -l mercurial/fileset*.py
559 mercurial/fileset.py
135 mercurial/filesetlang.py
694 total
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 22 Jul 2018 15:50:45 +0900 |
parents | d82c4d42b615 |
children | 5742d0428ed9 |
files | hgext/lfs/__init__.py mercurial/debugcommands.py mercurial/fileset.py mercurial/filesetlang.py mercurial/minifileset.py |
diffstat | 5 files changed, 155 insertions(+), 130 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/lfs/__init__.py Sat Jul 21 15:23:56 2018 +0900 +++ b/hgext/lfs/__init__.py Sun Jul 22 15:50:45 2018 +0900 @@ -136,7 +136,7 @@ exchange, extensions, filelog, - fileset, + filesetlang, hg, localrepo, minifileset, @@ -261,7 +261,7 @@ # deprecated config: lfs.threshold threshold = repo.ui.configbytes('lfs', 'threshold') if threshold: - fileset.parse(trackspec) # make sure syntax errors are confined + filesetlang.parse(trackspec) # make sure syntax errors are confined trackspec = "(%s) | size('>%d')" % (trackspec, threshold) return minifileset.compile(trackspec) @@ -361,7 +361,7 @@ def lfsfileset(mctx, x): """File that uses LFS storage.""" # i18n: "lfs" is a keyword - fileset.getargs(x, 0, 0, _("lfs takes no arguments")) + filesetlang.getargs(x, 0, 0, _("lfs takes no arguments")) ctx = mctx.ctx def lfsfilep(f): return wrapper.pointerfromctx(ctx, f, removed=True) is not None
--- a/mercurial/debugcommands.py Sat Jul 21 15:23:56 2018 +0900 +++ b/mercurial/debugcommands.py Sun Jul 22 15:50:45 2018 +0900 @@ -48,7 +48,7 @@ exchange, extensions, filemerge, - fileset, + filesetlang, formatter, hg, httppeer, @@ -916,13 +916,13 @@ raise error.Abort(_('invalid stage name: %s') % n) showalways.update(opts['show_stage']) - tree = fileset.parse(expr) + tree = filesetlang.parse(expr) for n, f in stages: tree = f(tree) if n in showalways: if opts['show_stage'] or n != 'parsed': ui.write(("* %s:\n") % n) - ui.write(fileset.prettyformat(tree), "\n") + ui.write(filesetlang.prettyformat(tree), "\n") files = set() if opts['all_files']:
--- a/mercurial/fileset.py Sat Jul 21 15:23:56 2018 +0900 +++ b/mercurial/fileset.py Sun Jul 22 15:50:45 2018 +0900 @@ -13,9 +13,9 @@ from .i18n import _ from . import ( error, + filesetlang, match as matchmod, merge, - parser, pycompat, registrar, scmutil, @@ -25,120 +25,12 @@ stringutil, ) -elements = { - # token-type: binding-strength, primary, prefix, infix, suffix - "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None), - ":": (15, None, None, ("kindpat", 15), None), - "-": (5, None, ("negate", 19), ("minus", 5), None), - "not": (10, None, ("not", 10), None, None), - "!": (10, None, ("not", 10), None, None), - "and": (5, None, None, ("and", 5), None), - "&": (5, None, None, ("and", 5), None), - "or": (4, None, None, ("or", 4), None), - "|": (4, None, None, ("or", 4), None), - "+": (4, None, None, ("or", 4), None), - ",": (2, None, None, ("list", 2), None), - ")": (0, None, None, None, None), - "symbol": (0, "symbol", None, None, None), - "string": (0, "string", None, None, None), - "end": (0, None, None, None, None), -} - -keywords = {'and', 'or', 'not'} - -globchars = ".*{}[]?/\\_" - -def tokenize(program): - pos, l = 0, len(program) - program = pycompat.bytestr(program) - while pos < l: - c = program[pos] - if c.isspace(): # skip inter-token whitespace - pass - elif c in "(),-:|&+!": # handle simple operators - yield (c, None, pos) - elif (c in '"\'' or c == 'r' and - program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings - if c == 'r': - pos += 1 - c = program[pos] - decode = lambda x: x - else: - decode = parser.unescapestr - pos += 1 - s = pos - while pos < l: # find closing quote - d = program[pos] - if d == '\\': # skip over escaped characters - pos += 2 - continue - if d == c: - yield ('string', decode(program[s:pos]), s) - break - pos += 1 - else: - raise error.ParseError(_("unterminated string"), s) - elif c.isalnum() or c in globchars or ord(c) > 127: - # gather up a symbol/keyword - s = pos - pos += 1 - while pos < l: # find end of symbol - d = program[pos] - if not (d.isalnum() or d in globchars or ord(d) > 127): - break - pos += 1 - sym = program[s:pos] - if sym in keywords: # operator keywords - yield (sym, None, s) - else: - yield ('symbol', sym, s) - pos -= 1 - else: - raise error.ParseError(_("syntax error"), pos) - pos += 1 - yield ('end', None, pos) - -def parse(expr): - p = parser.parser(elements) - tree, pos = p.parse(tokenize(expr)) - if pos != len(expr): - raise error.ParseError(_("invalid token"), pos) - return parser.simplifyinfixops(tree, {'list', 'or'}) - -def getsymbol(x): - if x and x[0] == 'symbol': - return x[1] - raise error.ParseError(_('not a symbol')) - -def getstring(x, err): - if x and (x[0] == 'string' or x[0] == 'symbol'): - return x[1] - raise error.ParseError(err) - -def _getkindpat(x, y, allkinds, err): - kind = getsymbol(x) - pat = getstring(y, err) - if kind not in allkinds: - raise error.ParseError(_("invalid pattern kind: %s") % kind) - return '%s:%s' % (kind, pat) - -def getpattern(x, allkinds, err): - if x and x[0] == 'kindpat': - return _getkindpat(x[1], x[2], allkinds, err) - return getstring(x, err) - -def getlist(x): - if not x: - return [] - if x[0] == 'list': - return list(x[1:]) - return [x] - -def getargs(x, min, max, err): - l = getlist(x) - if len(l) < min or len(l) > max: - raise error.ParseError(err) - return l +# helpers for processing parsed tree +getsymbol = filesetlang.getsymbol +getstring = filesetlang.getstring +_getkindpat = filesetlang.getkindpat +getpattern = filesetlang.getpattern +getargs = filesetlang.getargs def getmatch(mctx, x): if not x: @@ -192,7 +84,7 @@ # with: # mctx - current matchctx instance # x - argument in tree form -symbols = {} +symbols = filesetlang.symbols # filesets using matchctx.status() _statuscallers = set() @@ -635,7 +527,7 @@ def match(ctx, expr, badfn=None): """Create a matcher for a single fileset expression""" - tree = parse(expr) + tree = filesetlang.parse(expr) mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn) return getmatch(mctx, tree) @@ -653,9 +545,6 @@ else: return None -def prettyformat(tree): - return parser.prettyformat(tree, ('string', 'symbol')) - def loadpredicate(ui, extname, registrarobj): """Load fileset predicates from specified registrarobj """
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mercurial/filesetlang.py Sun Jul 22 15:50:45 2018 +0900 @@ -0,0 +1,135 @@ +# filesetlang.py - parser, tokenizer and utility for file set language +# +# Copyright 2010 Matt Mackall <mpm@selenic.com> +# +# This software may be used and distributed according to the terms of the +# GNU General Public License version 2 or any later version. + +from __future__ import absolute_import + +from .i18n import _ +from . import ( + error, + parser, + pycompat, +) + +elements = { + # token-type: binding-strength, primary, prefix, infix, suffix + "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None), + ":": (15, None, None, ("kindpat", 15), None), + "-": (5, None, ("negate", 19), ("minus", 5), None), + "not": (10, None, ("not", 10), None, None), + "!": (10, None, ("not", 10), None, None), + "and": (5, None, None, ("and", 5), None), + "&": (5, None, None, ("and", 5), None), + "or": (4, None, None, ("or", 4), None), + "|": (4, None, None, ("or", 4), None), + "+": (4, None, None, ("or", 4), None), + ",": (2, None, None, ("list", 2), None), + ")": (0, None, None, None, None), + "symbol": (0, "symbol", None, None, None), + "string": (0, "string", None, None, None), + "end": (0, None, None, None, None), +} + +keywords = {'and', 'or', 'not'} + +symbols = {} + +globchars = ".*{}[]?/\\_" + +def tokenize(program): + pos, l = 0, len(program) + program = pycompat.bytestr(program) + while pos < l: + c = program[pos] + if c.isspace(): # skip inter-token whitespace + pass + elif c in "(),-:|&+!": # handle simple operators + yield (c, None, pos) + elif (c in '"\'' or c == 'r' and + program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings + if c == 'r': + pos += 1 + c = program[pos] + decode = lambda x: x + else: + decode = parser.unescapestr + pos += 1 + s = pos + while pos < l: # find closing quote + d = program[pos] + if d == '\\': # skip over escaped characters + pos += 2 + continue + if d == c: + yield ('string', decode(program[s:pos]), s) + break + pos += 1 + else: + raise error.ParseError(_("unterminated string"), s) + elif c.isalnum() or c in globchars or ord(c) > 127: + # gather up a symbol/keyword + s = pos + pos += 1 + while pos < l: # find end of symbol + d = program[pos] + if not (d.isalnum() or d in globchars or ord(d) > 127): + break + pos += 1 + sym = program[s:pos] + if sym in keywords: # operator keywords + yield (sym, None, s) + else: + yield ('symbol', sym, s) + pos -= 1 + else: + raise error.ParseError(_("syntax error"), pos) + pos += 1 + yield ('end', None, pos) + +def parse(expr): + p = parser.parser(elements) + tree, pos = p.parse(tokenize(expr)) + if pos != len(expr): + raise error.ParseError(_("invalid token"), pos) + return parser.simplifyinfixops(tree, {'list', 'or'}) + +def getsymbol(x): + if x and x[0] == 'symbol': + return x[1] + raise error.ParseError(_('not a symbol')) + +def getstring(x, err): + if x and (x[0] == 'string' or x[0] == 'symbol'): + return x[1] + raise error.ParseError(err) + +def getkindpat(x, y, allkinds, err): + kind = getsymbol(x) + pat = getstring(y, err) + if kind not in allkinds: + raise error.ParseError(_("invalid pattern kind: %s") % kind) + return '%s:%s' % (kind, pat) + +def getpattern(x, allkinds, err): + if x and x[0] == 'kindpat': + return getkindpat(x[1], x[2], allkinds, err) + return getstring(x, err) + +def getlist(x): + if not x: + return [] + if x[0] == 'list': + return list(x[1:]) + return [x] + +def getargs(x, min, max, err): + l = getlist(x) + if len(l) < min or len(l) > max: + raise error.ParseError(err) + return l + +def prettyformat(tree): + return parser.prettyformat(tree, ('string', 'symbol'))
--- a/mercurial/minifileset.py Sat Jul 21 15:23:56 2018 +0900 +++ b/mercurial/minifileset.py Sun Jul 22 15:50:45 2018 +0900 @@ -11,12 +11,13 @@ from . import ( error, fileset, + filesetlang, pycompat, ) def _sizep(x): # i18n: "size" is a keyword - expr = fileset.getstring(x, _("size requires an expression")) + expr = filesetlang.getstring(x, _("size requires an expression")) return fileset.sizematcher(expr) def _compile(tree): @@ -24,7 +25,7 @@ raise error.ParseError(_("missing argument")) op = tree[0] if op in {'symbol', 'string', 'kindpat'}: - name = fileset.getpattern(tree, {'path'}, _('invalid file pattern')) + name = filesetlang.getpattern(tree, {'path'}, _('invalid file pattern')) if name.startswith('**'): # file extension test, ex. "**.tar.gz" ext = name[2:] for c in pycompat.bytestr(ext): @@ -57,7 +58,7 @@ 'size': lambda n, s: _sizep(tree[2])(s), } - name = fileset.getsymbol(tree[1]) + name = filesetlang.getsymbol(tree[1]) if name in symbols: return symbols[name] @@ -87,5 +88,5 @@ files whose name ends with ".zip", and all files under "bin" in the repo root except for "bin/README". """ - tree = fileset.parse(text) + tree = filesetlang.parse(text) return _compile(tree)