revset: introduce an API that avoids `formatspec` input serialization
authorBoris Feld <boris.feld@octobus.net>
Fri, 04 Jan 2019 13:41:21 +0100
changeset 41222 8aca89a694d4
parent 41221 73203cdfe3fe
child 41223 32ef47b3c91c
revset: introduce an API that avoids `formatspec` input serialization Instead of having the data fully serialized, the input can be directly inserted in the tree at a later stage. Just using it for simple "%ld" case provide a significant boost. For example here are the impact on a sample discovery run between two pypy repositories with arbitrary differences (using hg perfdiscovery). $ hg perfdiscovery before: ! wall 0.700435 comb 0.710000 user 0.700000 sys 0.010000 (median of 15) after: ! wall 0.501305 comb 0.510000 user 0.490000 sys 0.020000 (median of 20)
mercurial/localrepo.py
mercurial/revset.py
mercurial/revsetlang.py
--- a/mercurial/localrepo.py	Fri Jan 04 05:26:13 2019 +0100
+++ b/mercurial/localrepo.py	Fri Jan 04 13:41:21 2019 +0100
@@ -1362,9 +1362,8 @@
         Returns a revset.abstractsmartset, which is a list-like interface
         that contains integer revisions.
         '''
-        expr = revsetlang.formatspec(expr, *args)
-        m = revset.match(None, expr)
-        return m(self)
+        tree = revsetlang.spectree(expr, *args)
+        return revset.makematcher(tree)(self)
 
     def set(self, expr, *args):
         '''Find revisions matching a revset and emit changectx instances.
--- a/mercurial/revset.py	Fri Jan 04 05:26:13 2019 +0100
+++ b/mercurial/revset.py	Fri Jan 04 13:41:21 2019 +0100
@@ -125,6 +125,13 @@
         return baseset([x])
     return baseset()
 
+def rawsmartset(repo, subset, x, order):
+    """argument is already a smartset, use that directly"""
+    if order == followorder:
+        return subset & x
+    else:
+        return x & subset
+
 def rangeset(repo, subset, x, y, order):
     m = getset(repo, fullreposet(repo), x)
     n = getset(repo, fullreposet(repo), y)
@@ -2216,6 +2223,7 @@
     "ancestor": ancestorspec,
     "parent": parentspec,
     "parentpost": parentpost,
+    "smartset": rawsmartset,
 }
 
 subscriptrelations = {
--- a/mercurial/revsetlang.py	Fri Jan 04 05:26:13 2019 +0100
+++ b/mercurial/revsetlang.py	Fri Jan 04 13:41:21 2019 +0100
@@ -333,7 +333,7 @@
     elif op == 'negate':
         s = getstring(x[1], _("can't negate that"))
         return _analyze(('string', '-' + s))
-    elif op in ('string', 'symbol'):
+    elif op in ('string', 'symbol', 'smartset'):
         return x
     elif op == 'rangeall':
         return (op, None)
@@ -373,7 +373,7 @@
         return 0, x
 
     op = x[0]
-    if op in ('string', 'symbol'):
+    if op in ('string', 'symbol', 'smartset'):
         return 0.5, x # single revisions are small
     elif op == 'and':
         wa, ta = _optimize(x[1])
@@ -535,7 +535,8 @@
 def foldconcat(tree):
     """Fold elements to be concatenated by `##`
     """
-    if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
+    if (not isinstance(tree, tuple)
+        or tree[0] in ('string', 'symbol', 'smartset')):
         return tree
     if tree[0] == '_concat':
         pending = [tree]
@@ -691,6 +692,28 @@
             raise error.ProgrammingError("unknown revspec item type: %r" % t)
     return b''.join(ret)
 
+def spectree(expr, *args):
+    """similar to formatspec but return a parsed and optimized tree"""
+    parsed = _parseargs(expr, args)
+    ret = []
+    inputs = []
+    for t, arg in parsed:
+        if t is None:
+            ret.append(arg)
+        elif t == 'baseset':
+            newtree = ('smartset', smartset.baseset(arg))
+            inputs.append(newtree)
+            ret.append("$")
+        else:
+            raise error.ProgrammingError("unknown revspec item type: %r" % t)
+    expr = b''.join(ret)
+    tree = _parsewith(expr, syminitletters=_aliassyminitletters)
+    tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
+    tree = foldconcat(tree)
+    tree = analyze(tree)
+    tree = optimize(tree)
+    return tree
+
 def _parseargs(expr, args):
     """parse the expression and replace all inexpensive args