changeset 38845:b9162ea1b815

fileset: extract language processing part to new module (API) I'll add a couple more functions that work on parsed tree. % wc -l mercurial/fileset*.py 559 mercurial/fileset.py 135 mercurial/filesetlang.py 694 total
author Yuya Nishihara <yuya@tcha.org>
date Sun, 22 Jul 2018 15:50:45 +0900
parents d82c4d42b615
children 5742d0428ed9
files hgext/lfs/__init__.py mercurial/debugcommands.py mercurial/fileset.py mercurial/filesetlang.py mercurial/minifileset.py
diffstat 5 files changed, 155 insertions(+), 130 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/lfs/__init__.py	Sat Jul 21 15:23:56 2018 +0900
+++ b/hgext/lfs/__init__.py	Sun Jul 22 15:50:45 2018 +0900
@@ -136,7 +136,7 @@
     exchange,
     extensions,
     filelog,
-    fileset,
+    filesetlang,
     hg,
     localrepo,
     minifileset,
@@ -261,7 +261,7 @@
         # deprecated config: lfs.threshold
         threshold = repo.ui.configbytes('lfs', 'threshold')
         if threshold:
-            fileset.parse(trackspec)  # make sure syntax errors are confined
+            filesetlang.parse(trackspec)  # make sure syntax errors are confined
             trackspec = "(%s) | size('>%d')" % (trackspec, threshold)
 
         return minifileset.compile(trackspec)
@@ -361,7 +361,7 @@
 def lfsfileset(mctx, x):
     """File that uses LFS storage."""
     # i18n: "lfs" is a keyword
-    fileset.getargs(x, 0, 0, _("lfs takes no arguments"))
+    filesetlang.getargs(x, 0, 0, _("lfs takes no arguments"))
     ctx = mctx.ctx
     def lfsfilep(f):
         return wrapper.pointerfromctx(ctx, f, removed=True) is not None
--- a/mercurial/debugcommands.py	Sat Jul 21 15:23:56 2018 +0900
+++ b/mercurial/debugcommands.py	Sun Jul 22 15:50:45 2018 +0900
@@ -48,7 +48,7 @@
     exchange,
     extensions,
     filemerge,
-    fileset,
+    filesetlang,
     formatter,
     hg,
     httppeer,
@@ -916,13 +916,13 @@
                 raise error.Abort(_('invalid stage name: %s') % n)
         showalways.update(opts['show_stage'])
 
-    tree = fileset.parse(expr)
+    tree = filesetlang.parse(expr)
     for n, f in stages:
         tree = f(tree)
         if n in showalways:
             if opts['show_stage'] or n != 'parsed':
                 ui.write(("* %s:\n") % n)
-            ui.write(fileset.prettyformat(tree), "\n")
+            ui.write(filesetlang.prettyformat(tree), "\n")
 
     files = set()
     if opts['all_files']:
--- a/mercurial/fileset.py	Sat Jul 21 15:23:56 2018 +0900
+++ b/mercurial/fileset.py	Sun Jul 22 15:50:45 2018 +0900
@@ -13,9 +13,9 @@
 from .i18n import _
 from . import (
     error,
+    filesetlang,
     match as matchmod,
     merge,
-    parser,
     pycompat,
     registrar,
     scmutil,
@@ -25,120 +25,12 @@
     stringutil,
 )
 
-elements = {
-    # token-type: binding-strength, primary, prefix, infix, suffix
-    "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
-    ":": (15, None, None, ("kindpat", 15), None),
-    "-": (5, None, ("negate", 19), ("minus", 5), None),
-    "not": (10, None, ("not", 10), None, None),
-    "!": (10, None, ("not", 10), None, None),
-    "and": (5, None, None, ("and", 5), None),
-    "&": (5, None, None, ("and", 5), None),
-    "or": (4, None, None, ("or", 4), None),
-    "|": (4, None, None, ("or", 4), None),
-    "+": (4, None, None, ("or", 4), None),
-    ",": (2, None, None, ("list", 2), None),
-    ")": (0, None, None, None, None),
-    "symbol": (0, "symbol", None, None, None),
-    "string": (0, "string", None, None, None),
-    "end": (0, None, None, None, None),
-}
-
-keywords = {'and', 'or', 'not'}
-
-globchars = ".*{}[]?/\\_"
-
-def tokenize(program):
-    pos, l = 0, len(program)
-    program = pycompat.bytestr(program)
-    while pos < l:
-        c = program[pos]
-        if c.isspace(): # skip inter-token whitespace
-            pass
-        elif c in "(),-:|&+!": # handle simple operators
-            yield (c, None, pos)
-        elif (c in '"\'' or c == 'r' and
-              program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
-            if c == 'r':
-                pos += 1
-                c = program[pos]
-                decode = lambda x: x
-            else:
-                decode = parser.unescapestr
-            pos += 1
-            s = pos
-            while pos < l: # find closing quote
-                d = program[pos]
-                if d == '\\': # skip over escaped characters
-                    pos += 2
-                    continue
-                if d == c:
-                    yield ('string', decode(program[s:pos]), s)
-                    break
-                pos += 1
-            else:
-                raise error.ParseError(_("unterminated string"), s)
-        elif c.isalnum() or c in globchars or ord(c) > 127:
-            # gather up a symbol/keyword
-            s = pos
-            pos += 1
-            while pos < l: # find end of symbol
-                d = program[pos]
-                if not (d.isalnum() or d in globchars or ord(d) > 127):
-                    break
-                pos += 1
-            sym = program[s:pos]
-            if sym in keywords: # operator keywords
-                yield (sym, None, s)
-            else:
-                yield ('symbol', sym, s)
-            pos -= 1
-        else:
-            raise error.ParseError(_("syntax error"), pos)
-        pos += 1
-    yield ('end', None, pos)
-
-def parse(expr):
-    p = parser.parser(elements)
-    tree, pos = p.parse(tokenize(expr))
-    if pos != len(expr):
-        raise error.ParseError(_("invalid token"), pos)
-    return parser.simplifyinfixops(tree, {'list', 'or'})
-
-def getsymbol(x):
-    if x and x[0] == 'symbol':
-        return x[1]
-    raise error.ParseError(_('not a symbol'))
-
-def getstring(x, err):
-    if x and (x[0] == 'string' or x[0] == 'symbol'):
-        return x[1]
-    raise error.ParseError(err)
-
-def _getkindpat(x, y, allkinds, err):
-    kind = getsymbol(x)
-    pat = getstring(y, err)
-    if kind not in allkinds:
-        raise error.ParseError(_("invalid pattern kind: %s") % kind)
-    return '%s:%s' % (kind, pat)
-
-def getpattern(x, allkinds, err):
-    if x and x[0] == 'kindpat':
-        return _getkindpat(x[1], x[2], allkinds, err)
-    return getstring(x, err)
-
-def getlist(x):
-    if not x:
-        return []
-    if x[0] == 'list':
-        return list(x[1:])
-    return [x]
-
-def getargs(x, min, max, err):
-    l = getlist(x)
-    if len(l) < min or len(l) > max:
-        raise error.ParseError(err)
-    return l
+# helpers for processing parsed tree
+getsymbol = filesetlang.getsymbol
+getstring = filesetlang.getstring
+_getkindpat = filesetlang.getkindpat
+getpattern = filesetlang.getpattern
+getargs = filesetlang.getargs
 
 def getmatch(mctx, x):
     if not x:
@@ -192,7 +84,7 @@
 # with:
 #  mctx - current matchctx instance
 #  x - argument in tree form
-symbols = {}
+symbols = filesetlang.symbols
 
 # filesets using matchctx.status()
 _statuscallers = set()
@@ -635,7 +527,7 @@
 
 def match(ctx, expr, badfn=None):
     """Create a matcher for a single fileset expression"""
-    tree = parse(expr)
+    tree = filesetlang.parse(expr)
     mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn)
     return getmatch(mctx, tree)
 
@@ -653,9 +545,6 @@
     else:
         return None
 
-def prettyformat(tree):
-    return parser.prettyformat(tree, ('string', 'symbol'))
-
 def loadpredicate(ui, extname, registrarobj):
     """Load fileset predicates from specified registrarobj
     """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/filesetlang.py	Sun Jul 22 15:50:45 2018 +0900
@@ -0,0 +1,135 @@
+# filesetlang.py - parser, tokenizer and utility for file set language
+#
+# Copyright 2010 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+from .i18n import _
+from . import (
+    error,
+    parser,
+    pycompat,
+)
+
+elements = {
+    # token-type: binding-strength, primary, prefix, infix, suffix
+    "(": (20, None, ("group", 1, ")"), ("func", 1, ")"), None),
+    ":": (15, None, None, ("kindpat", 15), None),
+    "-": (5, None, ("negate", 19), ("minus", 5), None),
+    "not": (10, None, ("not", 10), None, None),
+    "!": (10, None, ("not", 10), None, None),
+    "and": (5, None, None, ("and", 5), None),
+    "&": (5, None, None, ("and", 5), None),
+    "or": (4, None, None, ("or", 4), None),
+    "|": (4, None, None, ("or", 4), None),
+    "+": (4, None, None, ("or", 4), None),
+    ",": (2, None, None, ("list", 2), None),
+    ")": (0, None, None, None, None),
+    "symbol": (0, "symbol", None, None, None),
+    "string": (0, "string", None, None, None),
+    "end": (0, None, None, None, None),
+}
+
+keywords = {'and', 'or', 'not'}
+
+symbols = {}
+
+globchars = ".*{}[]?/\\_"
+
+def tokenize(program):
+    pos, l = 0, len(program)
+    program = pycompat.bytestr(program)
+    while pos < l:
+        c = program[pos]
+        if c.isspace(): # skip inter-token whitespace
+            pass
+        elif c in "(),-:|&+!": # handle simple operators
+            yield (c, None, pos)
+        elif (c in '"\'' or c == 'r' and
+              program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
+            if c == 'r':
+                pos += 1
+                c = program[pos]
+                decode = lambda x: x
+            else:
+                decode = parser.unescapestr
+            pos += 1
+            s = pos
+            while pos < l: # find closing quote
+                d = program[pos]
+                if d == '\\': # skip over escaped characters
+                    pos += 2
+                    continue
+                if d == c:
+                    yield ('string', decode(program[s:pos]), s)
+                    break
+                pos += 1
+            else:
+                raise error.ParseError(_("unterminated string"), s)
+        elif c.isalnum() or c in globchars or ord(c) > 127:
+            # gather up a symbol/keyword
+            s = pos
+            pos += 1
+            while pos < l: # find end of symbol
+                d = program[pos]
+                if not (d.isalnum() or d in globchars or ord(d) > 127):
+                    break
+                pos += 1
+            sym = program[s:pos]
+            if sym in keywords: # operator keywords
+                yield (sym, None, s)
+            else:
+                yield ('symbol', sym, s)
+            pos -= 1
+        else:
+            raise error.ParseError(_("syntax error"), pos)
+        pos += 1
+    yield ('end', None, pos)
+
+def parse(expr):
+    p = parser.parser(elements)
+    tree, pos = p.parse(tokenize(expr))
+    if pos != len(expr):
+        raise error.ParseError(_("invalid token"), pos)
+    return parser.simplifyinfixops(tree, {'list', 'or'})
+
+def getsymbol(x):
+    if x and x[0] == 'symbol':
+        return x[1]
+    raise error.ParseError(_('not a symbol'))
+
+def getstring(x, err):
+    if x and (x[0] == 'string' or x[0] == 'symbol'):
+        return x[1]
+    raise error.ParseError(err)
+
+def getkindpat(x, y, allkinds, err):
+    kind = getsymbol(x)
+    pat = getstring(y, err)
+    if kind not in allkinds:
+        raise error.ParseError(_("invalid pattern kind: %s") % kind)
+    return '%s:%s' % (kind, pat)
+
+def getpattern(x, allkinds, err):
+    if x and x[0] == 'kindpat':
+        return getkindpat(x[1], x[2], allkinds, err)
+    return getstring(x, err)
+
+def getlist(x):
+    if not x:
+        return []
+    if x[0] == 'list':
+        return list(x[1:])
+    return [x]
+
+def getargs(x, min, max, err):
+    l = getlist(x)
+    if len(l) < min or len(l) > max:
+        raise error.ParseError(err)
+    return l
+
+def prettyformat(tree):
+    return parser.prettyformat(tree, ('string', 'symbol'))
--- a/mercurial/minifileset.py	Sat Jul 21 15:23:56 2018 +0900
+++ b/mercurial/minifileset.py	Sun Jul 22 15:50:45 2018 +0900
@@ -11,12 +11,13 @@
 from . import (
     error,
     fileset,
+    filesetlang,
     pycompat,
 )
 
 def _sizep(x):
     # i18n: "size" is a keyword
-    expr = fileset.getstring(x, _("size requires an expression"))
+    expr = filesetlang.getstring(x, _("size requires an expression"))
     return fileset.sizematcher(expr)
 
 def _compile(tree):
@@ -24,7 +25,7 @@
         raise error.ParseError(_("missing argument"))
     op = tree[0]
     if op in {'symbol', 'string', 'kindpat'}:
-        name = fileset.getpattern(tree, {'path'}, _('invalid file pattern'))
+        name = filesetlang.getpattern(tree, {'path'}, _('invalid file pattern'))
         if name.startswith('**'): # file extension test, ex. "**.tar.gz"
             ext = name[2:]
             for c in pycompat.bytestr(ext):
@@ -57,7 +58,7 @@
             'size': lambda n, s: _sizep(tree[2])(s),
         }
 
-        name = fileset.getsymbol(tree[1])
+        name = filesetlang.getsymbol(tree[1])
         if name in symbols:
             return symbols[name]
 
@@ -87,5 +88,5 @@
     files whose name ends with ".zip", and all files under "bin" in the repo
     root except for "bin/README".
     """
-    tree = fileset.parse(text)
+    tree = filesetlang.parse(text)
     return _compile(tree)