changeset 38829:7e7e2b2ff284

fileset: add stub for weight-based optimization The main purpose of this change is to group basic patterns, which can be mapped to a plain matcher. I'm not so interested in a weight of each function.
author Yuya Nishihara <yuya@tcha.org>
date Sat, 21 Jul 2018 15:52:26 +0900
parents 3ea6ce609747
children bfd5def3fe02
files mercurial/debugcommands.py mercurial/fileset.py mercurial/filesetlang.py mercurial/minifileset.py mercurial/registrar.py tests/test-fileset.t
diffstat 6 files changed, 58 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/debugcommands.py	Sat Jul 21 16:16:44 2018 +0900
+++ b/mercurial/debugcommands.py	Sat Jul 21 15:52:26 2018 +0900
@@ -902,6 +902,7 @@
     stages = [
         ('parsed', pycompat.identity),
         ('analyzed', filesetlang.analyze),
+        ('optimized', filesetlang.optimize),
     ]
     stagenames = set(n for n, f in stages)
 
--- a/mercurial/fileset.py	Sat Jul 21 16:16:44 2018 +0900
+++ b/mercurial/fileset.py	Sat Jul 21 15:52:26 2018 +0900
@@ -524,6 +524,7 @@
     """Create a matcher for a single fileset expression"""
     tree = filesetlang.parse(expr)
     tree = filesetlang.analyze(tree)
+    tree = filesetlang.optimize(tree)
     mctx = matchctx(ctx, _buildstatus(ctx, tree), badfn=badfn)
     return getmatch(mctx, tree)
 
--- a/mercurial/filesetlang.py	Sat Jul 21 16:16:44 2018 +0900
+++ b/mercurial/filesetlang.py	Sat Jul 21 15:52:26 2018 +0900
@@ -164,12 +164,50 @@
 
 def analyze(x):
     """Transform raw parsed tree to evaluatable tree which can be fed to
-    getmatch()
+    optimize() or getmatch()
 
     All pseudo operations should be mapped to real operations or functions
     defined in methods or symbols table respectively.
     """
     return _analyze(x)
 
+def _optimize(x):
+    if x is None:
+        return 0, x
+
+    op = x[0]
+    if op in {'string', 'symbol'}:
+        return 0.5, x
+    if op == 'kindpat':
+        w, t = _optimize(x[2])
+        return w, (op, x[1], t)
+    if op == 'not':
+        w, t = _optimize(x[1])
+        return w, (op, t)
+    if op in {'and', 'minus'}:
+        wa, ta = _optimize(x[1])
+        wb, tb = _optimize(x[2])
+        return max(wa, wb), (op, ta, tb)
+    if op == 'or':
+        ws, ts = zip(*(_optimize(y) for y in x[1:]))
+        return max(ws), (op,) + ts
+    if op == 'list':
+        ws, ts = zip(*(_optimize(y) for y in x[1:]))
+        return sum(ws), (op,) + ts
+    if op == 'func':
+        f = getsymbol(x[1])
+        w = getattr(symbols.get(f), '_weight', 1)
+        wa, ta = _optimize(x[2])
+        return w + wa, (op, x[1], ta)
+    raise error.ProgrammingError('invalid operator %r' % op)
+
+def optimize(x):
+    """Reorder/rewrite evaluatable tree for optimization
+
+    All pseudo operations should be transformed beforehand.
+    """
+    _w, t = _optimize(x)
+    return t
+
 def prettyformat(tree):
     return parser.prettyformat(tree, ('string', 'symbol'))
--- a/mercurial/minifileset.py	Sat Jul 21 16:16:44 2018 +0900
+++ b/mercurial/minifileset.py	Sat Jul 21 15:52:26 2018 +0900
@@ -86,4 +86,5 @@
     """
     tree = filesetlang.parse(text)
     tree = filesetlang.analyze(tree)
+    tree = filesetlang.optimize(tree)
     return _compile(tree)
--- a/mercurial/registrar.py	Sat Jul 21 16:16:44 2018 +0900
+++ b/mercurial/registrar.py	Sat Jul 21 15:52:26 2018 +0900
@@ -247,6 +247,9 @@
      implies 'matchctx.status()' at runtime or not (False, by
      default).
 
+    Optional argument 'weight' indicates the estimated run-time cost, useful
+    for static optimization, default is 1. Higher weight means more expensive.
+
     'filesetpredicate' instance in example above can be used to
     decorate multiple functions.
 
@@ -259,8 +262,9 @@
     _getname = _funcregistrarbase._parsefuncdecl
     _docformat = "``%s``\n    %s"
 
-    def _extrasetup(self, name, func, callstatus=False):
+    def _extrasetup(self, name, func, callstatus=False, weight=1):
         func._callstatus = callstatus
+        func._weight = weight
 
 class _templateregistrarbase(_funcregistrarbase):
     """Base of decorator to register functions as template specific one
--- a/tests/test-fileset.t	Sat Jul 21 16:16:44 2018 +0900
+++ b/tests/test-fileset.t	Sat Jul 21 15:52:26 2018 +0900
@@ -180,6 +180,17 @@
       (func
         (symbol 'clean')
         None)))
+  * optimized:
+  (or
+    (symbol 'a1')
+    (symbol 'a2')
+    (and
+      (func
+        (symbol 'grep')
+        (string 'b'))
+      (func
+        (symbol 'clean')
+        None)))
   * matcher:
   <unionmatcher matchers=[
     <patternmatcher patterns='(?:a1$)'>,