revset: split language services to revsetlang module (API)
authorYuya Nishihara <yuya@tcha.org>
Sun, 19 Feb 2017 18:19:33 +0900
changeset 31024 0b8356705de6
parent 31023 aea06029919e
child 31025 6cf2857526c7
revset: split language services to revsetlang module (API) New revsetlang module hosts parser, tokenizer, and miscellaneous functions working on parsed tree. It does not include functions for evaluation such as getset() and match(). 2288 mercurial/revset.py 684 mercurial/revsetlang.py 2972 total get*() functions are aliased since they are common in revset.py.
hgext/mq.py
mercurial/commands.py
mercurial/debugcommands.py
mercurial/hgweb/webcommands.py
mercurial/localrepo.py
mercurial/revset.py
mercurial/revsetlang.py
mercurial/scmutil.py
mercurial/templater.py
tests/test-doctest.py
tests/test-glog.t
tests/test-revset.t
--- a/hgext/mq.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/hgext/mq.py	Sun Feb 19 18:19:33 2017 +0900
@@ -89,7 +89,7 @@
     phases,
     pycompat,
     registrar,
-    revset,
+    revsetlang,
     scmutil,
     smartset,
     subrepo,
@@ -3568,7 +3568,7 @@
 def revsetmq(repo, subset, x):
     """Changesets managed by MQ.
     """
-    revset.getargs(x, 0, 0, _("mq takes no arguments"))
+    revsetlang.getargs(x, 0, 0, _("mq takes no arguments"))
     applied = set([repo[r.node].rev() for r in repo.mq.applied])
     return smartset.baseset([r for r in subset if r in applied])
 
--- a/mercurial/commands.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/commands.py	Sun Feb 19 18:19:33 2017 +0900
@@ -44,7 +44,7 @@
     patch,
     phases,
     pycompat,
-    revset,
+    revsetlang,
     scmutil,
     server,
     sshserver,
@@ -3414,7 +3414,7 @@
 
     """
     if opts.get('follow') and opts.get('rev'):
-        opts['rev'] = [revset.formatspec('reverse(::%lr)', opts.get('rev'))]
+        opts['rev'] = [revsetlang.formatspec('reverse(::%lr)', opts.get('rev'))]
         del opts['follow']
 
     if opts.get('graph'):
@@ -4093,7 +4093,7 @@
     elif path.pushrev:
         # It doesn't make any sense to specify ancestor revisions. So limit
         # to DAG heads to make discovery simpler.
-        expr = revset.formatspec('heads(%r)', path.pushrev)
+        expr = revsetlang.formatspec('heads(%r)', path.pushrev)
         revs = scmutil.revrange(repo, [expr])
         revs = [repo[rev].node() for rev in revs]
         if not revs:
--- a/mercurial/debugcommands.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/debugcommands.py	Sun Feb 19 18:19:33 2017 +0900
@@ -52,6 +52,7 @@
     repair,
     revlog,
     revset,
+    revsetlang,
     scmutil,
     setdiscovery,
     simplemerge,
@@ -1794,10 +1795,10 @@
     """
     stages = [
         ('parsed', lambda tree: tree),
-        ('expanded', lambda tree: revset.expandaliases(ui, tree)),
-        ('concatenated', revset.foldconcat),
-        ('analyzed', revset.analyze),
-        ('optimized', revset.optimize),
+        ('expanded', lambda tree: revsetlang.expandaliases(ui, tree)),
+        ('concatenated', revsetlang.foldconcat),
+        ('analyzed', revsetlang.analyze),
+        ('optimized', revsetlang.optimize),
     ]
     if opts['no_optimized']:
         stages = stages[:-1]
@@ -1826,13 +1827,13 @@
 
     treebystage = {}
     printedtree = None
-    tree = revset.parse(expr, lookup=repo.__contains__)
+    tree = revsetlang.parse(expr, lookup=repo.__contains__)
     for n, f in stages:
         treebystage[n] = tree = f(tree)
         if n in showalways or (n in showchanged and tree != printedtree):
             if opts['show_stage'] or n != 'parsed':
                 ui.write(("* %s:\n") % n)
-            ui.write(revset.prettyformat(tree), "\n")
+            ui.write(revsetlang.prettyformat(tree), "\n")
             printedtree = tree
 
     if opts['verify_optimized']:
--- a/mercurial/hgweb/webcommands.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/hgweb/webcommands.py	Sun Feb 19 18:19:33 2017 +0900
@@ -32,6 +32,7 @@
     error,
     graphmod,
     revset,
+    revsetlang,
     scmutil,
     smartset,
     templatefilters,
@@ -239,20 +240,20 @@
 
         revdef = 'reverse(%s)' % query
         try:
-            tree = revset.parse(revdef)
+            tree = revsetlang.parse(revdef)
         except error.ParseError:
             # can't parse to a revset tree
             return MODE_KEYWORD, query
 
-        if revset.depth(tree) <= 2:
+        if revsetlang.depth(tree) <= 2:
             # no revset syntax used
             return MODE_KEYWORD, query
 
         if any((token, (value or '')[:3]) == ('string', 're:')
-                    for token, value, pos in revset.tokenize(revdef)):
+               for token, value, pos in revsetlang.tokenize(revdef)):
             return MODE_KEYWORD, query
 
-        funcsused = revset.funcsused(tree)
+        funcsused = revsetlang.funcsused(tree)
         if not funcsused.issubset(revset.safesymbols):
             return MODE_KEYWORD, query
 
--- a/mercurial/localrepo.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/localrepo.py	Sun Feb 19 18:19:33 2017 +0900
@@ -50,6 +50,7 @@
     pushkey,
     repoview,
     revset,
+    revsetlang,
     scmutil,
     store,
     subrepo,
@@ -573,7 +574,7 @@
         '''Find revisions matching a revset.
 
         The revset is specified as a string ``expr`` that may contain
-        %-formatting to escape certain types. See ``revset.formatspec``.
+        %-formatting to escape certain types. See ``revsetlang.formatspec``.
 
         Revset aliases from the configuration are not expanded. To expand
         user aliases, consider calling ``scmutil.revrange()``.
@@ -581,7 +582,7 @@
         Returns a revset.abstractsmartset, which is a list-like interface
         that contains integer revisions.
         '''
-        expr = revset.formatspec(expr, *args)
+        expr = revsetlang.formatspec(expr, *args)
         m = revset.match(None, expr)
         return m(self)
 
--- a/mercurial/revset.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/revset.py	Sun Feb 19 18:19:33 2017 +0900
@@ -9,7 +9,6 @@
 
 import heapq
 import re
-import string
 
 from .i18n import _
 from . import (
@@ -20,16 +19,29 @@
     match as matchmod,
     node,
     obsolete as obsmod,
-    parser,
     pathutil,
     phases,
-    pycompat,
     registrar,
     repoview,
+    revsetlang,
     smartset,
     util,
 )
 
+# helpers for processing parsed tree
+getsymbol = revsetlang.getsymbol
+getstring = revsetlang.getstring
+getinteger = revsetlang.getinteger
+getlist = revsetlang.getlist
+getrange = revsetlang.getrange
+getargs = revsetlang.getargs
+getargsdict = revsetlang.getargsdict
+
+# constants used as an argument of match() and matchany()
+anyorder = revsetlang.anyorder
+defineorder = revsetlang.defineorder
+followorder = revsetlang.followorder
+
 baseset = smartset.baseset
 generatorset = smartset.generatorset
 spanset = smartset.spanset
@@ -152,213 +164,8 @@
     revs.sort()
     return revs
 
-elements = {
-    # token-type: binding-strength, primary, prefix, infix, suffix
-    "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
-    "##": (20, None, None, ("_concat", 20), None),
-    "~": (18, None, None, ("ancestor", 18), None),
-    "^": (18, None, None, ("parent", 18), "parentpost"),
-    "-": (5, None, ("negate", 19), ("minus", 5), None),
-    "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
-    "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
-    ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
-    "not": (10, None, ("not", 10), None, None),
-    "!": (10, None, ("not", 10), None, None),
-    "and": (5, None, None, ("and", 5), None),
-    "&": (5, None, None, ("and", 5), None),
-    "%": (5, None, None, ("only", 5), "onlypost"),
-    "or": (4, None, None, ("or", 4), None),
-    "|": (4, None, None, ("or", 4), None),
-    "+": (4, None, None, ("or", 4), None),
-    "=": (3, None, None, ("keyvalue", 3), None),
-    ",": (2, None, None, ("list", 2), None),
-    ")": (0, None, None, None, None),
-    "symbol": (0, "symbol", None, None, None),
-    "string": (0, "string", None, None, None),
-    "end": (0, None, None, None, None),
-}
-
-keywords = set(['and', 'or', 'not'])
-
-# default set of valid characters for the initial letter of symbols
-_syminitletters = set(
-    string.ascii_letters +
-    string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256)))
-
-# default set of valid characters for non-initial letters of symbols
-_symletters = _syminitletters | set(pycompat.sysstr('-/'))
-
-def tokenize(program, lookup=None, syminitletters=None, symletters=None):
-    '''
-    Parse a revset statement into a stream of tokens
-
-    ``syminitletters`` is the set of valid characters for the initial
-    letter of symbols.
-
-    By default, character ``c`` is recognized as valid for initial
-    letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
-
-    ``symletters`` is the set of valid characters for non-initial
-    letters of symbols.
-
-    By default, character ``c`` is recognized as valid for non-initial
-    letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
-
-    Check that @ is a valid unquoted token character (issue3686):
-    >>> list(tokenize("@::"))
-    [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
-
-    '''
-    if syminitletters is None:
-        syminitletters = _syminitletters
-    if symletters is None:
-        symletters = _symletters
-
-    if program and lookup:
-        # attempt to parse old-style ranges first to deal with
-        # things like old-tag which contain query metacharacters
-        parts = program.split(':', 1)
-        if all(lookup(sym) for sym in parts if sym):
-            if parts[0]:
-                yield ('symbol', parts[0], 0)
-            if len(parts) > 1:
-                s = len(parts[0])
-                yield (':', None, s)
-                if parts[1]:
-                    yield ('symbol', parts[1], s + 1)
-            yield ('end', None, len(program))
-            return
-
-    pos, l = 0, len(program)
-    while pos < l:
-        c = program[pos]
-        if c.isspace(): # skip inter-token whitespace
-            pass
-        elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
-            yield ('::', None, pos)
-            pos += 1 # skip ahead
-        elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
-            yield ('..', None, pos)
-            pos += 1 # skip ahead
-        elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
-            yield ('##', None, pos)
-            pos += 1 # skip ahead
-        elif c in "():=,-|&+!~^%": # handle simple operators
-            yield (c, None, pos)
-        elif (c in '"\'' or c == 'r' and
-              program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
-            if c == 'r':
-                pos += 1
-                c = program[pos]
-                decode = lambda x: x
-            else:
-                decode = parser.unescapestr
-            pos += 1
-            s = pos
-            while pos < l: # find closing quote
-                d = program[pos]
-                if d == '\\': # skip over escaped characters
-                    pos += 2
-                    continue
-                if d == c:
-                    yield ('string', decode(program[s:pos]), s)
-                    break
-                pos += 1
-            else:
-                raise error.ParseError(_("unterminated string"), s)
-        # gather up a symbol/keyword
-        elif c in syminitletters:
-            s = pos
-            pos += 1
-            while pos < l: # find end of symbol
-                d = program[pos]
-                if d not in symletters:
-                    break
-                if d == '.' and program[pos - 1] == '.': # special case for ..
-                    pos -= 1
-                    break
-                pos += 1
-            sym = program[s:pos]
-            if sym in keywords: # operator keywords
-                yield (sym, None, s)
-            elif '-' in sym:
-                # some jerk gave us foo-bar-baz, try to check if it's a symbol
-                if lookup and lookup(sym):
-                    # looks like a real symbol
-                    yield ('symbol', sym, s)
-                else:
-                    # looks like an expression
-                    parts = sym.split('-')
-                    for p in parts[:-1]:
-                        if p: # possible consecutive -
-                            yield ('symbol', p, s)
-                        s += len(p)
-                        yield ('-', None, pos)
-                        s += 1
-                    if parts[-1]: # possible trailing -
-                        yield ('symbol', parts[-1], s)
-            else:
-                yield ('symbol', sym, s)
-            pos -= 1
-        else:
-            raise error.ParseError(_("syntax error in revset '%s'") %
-                                   program, pos)
-        pos += 1
-    yield ('end', None, pos)
-
 # helpers
 
-_notset = object()
-
-def getsymbol(x):
-    if x and x[0] == 'symbol':
-        return x[1]
-    raise error.ParseError(_('not a symbol'))
-
-def getstring(x, err):
-    if x and (x[0] == 'string' or x[0] == 'symbol'):
-        return x[1]
-    raise error.ParseError(err)
-
-def getinteger(x, err, default=_notset):
-    if not x and default is not _notset:
-        return default
-    try:
-        return int(getstring(x, err))
-    except ValueError:
-        raise error.ParseError(err)
-
-def getlist(x):
-    if not x:
-        return []
-    if x[0] == 'list':
-        return list(x[1:])
-    return [x]
-
-def getrange(x, err):
-    if not x:
-        raise error.ParseError(err)
-    op = x[0]
-    if op == 'range':
-        return x[1], x[2]
-    elif op == 'rangepre':
-        return None, x[1]
-    elif op == 'rangepost':
-        return x[1], None
-    elif op == 'rangeall':
-        return None, None
-    raise error.ParseError(err)
-
-def getargs(x, min, max, err):
-    l = getlist(x)
-    if len(l) < min or (max >= 0 and len(l) > max):
-        raise error.ParseError(err)
-    return l
-
-def getargsdict(x, funcname, keys):
-    return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
-                                keyvaluenode='keyvalue', keynode='symbol')
-
 def getset(repo, subset, x):
     if not x:
         raise error.ParseError(_("missing argument"))
@@ -2412,350 +2219,6 @@
     "parentpost": parentpost,
 }
 
-# Constants for ordering requirement, used in _analyze():
-#
-# If 'define', any nested functions and operations can change the ordering of
-# the entries in the set. If 'follow', any nested functions and operations
-# should take the ordering specified by the first operand to the '&' operator.
-#
-# For instance,
-#
-#   X & (Y | Z)
-#   ^   ^^^^^^^
-#   |   follow
-#   define
-#
-# will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
-# of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
-#
-# 'any' means the order doesn't matter. For instance,
-#
-#   X & !Y
-#        ^
-#        any
-#
-# 'y()' can either enforce its ordering requirement or take the ordering
-# specified by 'x()' because 'not()' doesn't care the order.
-#
-# Transition of ordering requirement:
-#
-# 1. starts with 'define'
-# 2. shifts to 'follow' by 'x & y'
-# 3. changes back to 'define' on function call 'f(x)' or function-like
-#    operation 'x (f) y' because 'f' may have its own ordering requirement
-#    for 'x' and 'y' (e.g. 'first(x)')
-#
-anyorder = 'any'        # don't care the order
-defineorder = 'define'  # should define the order
-followorder = 'follow'  # must follow the current order
-
-# transition table for 'x & y', from the current expression 'x' to 'y'
-_tofolloworder = {
-    anyorder: anyorder,
-    defineorder: followorder,
-    followorder: followorder,
-}
-
-def _matchonly(revs, bases):
-    """
-    >>> f = lambda *args: _matchonly(*map(parse, args))
-    >>> f('ancestors(A)', 'not ancestors(B)')
-    ('list', ('symbol', 'A'), ('symbol', 'B'))
-    """
-    if (revs is not None
-        and revs[0] == 'func'
-        and getsymbol(revs[1]) == 'ancestors'
-        and bases is not None
-        and bases[0] == 'not'
-        and bases[1][0] == 'func'
-        and getsymbol(bases[1][1]) == 'ancestors'):
-        return ('list', revs[2], bases[1][2])
-
-def _fixops(x):
-    """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
-    handled well by our simple top-down parser"""
-    if not isinstance(x, tuple):
-        return x
-
-    op = x[0]
-    if op == 'parent':
-        # x^:y means (x^) : y, not x ^ (:y)
-        # x^:  means (x^) :,   not x ^ (:)
-        post = ('parentpost', x[1])
-        if x[2][0] == 'dagrangepre':
-            return _fixops(('dagrange', post, x[2][1]))
-        elif x[2][0] == 'rangepre':
-            return _fixops(('range', post, x[2][1]))
-        elif x[2][0] == 'rangeall':
-            return _fixops(('rangepost', post))
-    elif op == 'or':
-        # make number of arguments deterministic:
-        # x + y + z -> (or x y z) -> (or (list x y z))
-        return (op, _fixops(('list',) + x[1:]))
-
-    return (op,) + tuple(_fixops(y) for y in x[1:])
-
-def _analyze(x, order):
-    if x is None:
-        return x
-
-    op = x[0]
-    if op == 'minus':
-        return _analyze(('and', x[1], ('not', x[2])), order)
-    elif op == 'only':
-        t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
-        return _analyze(t, order)
-    elif op == 'onlypost':
-        return _analyze(('func', ('symbol', 'only'), x[1]), order)
-    elif op == 'dagrangepre':
-        return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
-    elif op == 'dagrangepost':
-        return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
-    elif op == 'negate':
-        s = getstring(x[1], _("can't negate that"))
-        return _analyze(('string', '-' + s), order)
-    elif op in ('string', 'symbol'):
-        return x
-    elif op == 'and':
-        ta = _analyze(x[1], order)
-        tb = _analyze(x[2], _tofolloworder[order])
-        return (op, ta, tb, order)
-    elif op == 'or':
-        return (op, _analyze(x[1], order), order)
-    elif op == 'not':
-        return (op, _analyze(x[1], anyorder), order)
-    elif op == 'rangeall':
-        return (op, None, order)
-    elif op in ('rangepre', 'rangepost', 'parentpost'):
-        return (op, _analyze(x[1], defineorder), order)
-    elif op == 'group':
-        return _analyze(x[1], order)
-    elif op in ('dagrange', 'range', 'parent', 'ancestor'):
-        ta = _analyze(x[1], defineorder)
-        tb = _analyze(x[2], defineorder)
-        return (op, ta, tb, order)
-    elif op == 'list':
-        return (op,) + tuple(_analyze(y, order) for y in x[1:])
-    elif op == 'keyvalue':
-        return (op, x[1], _analyze(x[2], order))
-    elif op == 'func':
-        f = getsymbol(x[1])
-        d = defineorder
-        if f == 'present':
-            # 'present(set)' is known to return the argument set with no
-            # modification, so forward the current order to its argument
-            d = order
-        return (op, x[1], _analyze(x[2], d), order)
-    raise ValueError('invalid operator %r' % op)
-
-def analyze(x, order=defineorder):
-    """Transform raw parsed tree to evaluatable tree which can be fed to
-    optimize() or getset()
-
-    All pseudo operations should be mapped to real operations or functions
-    defined in methods or symbols table respectively.
-
-    'order' specifies how the current expression 'x' is ordered (see the
-    constants defined above.)
-    """
-    return _analyze(x, order)
-
-def _optimize(x, small):
-    if x is None:
-        return 0, x
-
-    smallbonus = 1
-    if small:
-        smallbonus = .5
-
-    op = x[0]
-    if op in ('string', 'symbol'):
-        return smallbonus, x # single revisions are small
-    elif op == 'and':
-        wa, ta = _optimize(x[1], True)
-        wb, tb = _optimize(x[2], True)
-        order = x[3]
-        w = min(wa, wb)
-
-        # (::x and not ::y)/(not ::y and ::x) have a fast path
-        tm = _matchonly(ta, tb) or _matchonly(tb, ta)
-        if tm:
-            return w, ('func', ('symbol', 'only'), tm, order)
-
-        if tb is not None and tb[0] == 'not':
-            return wa, ('difference', ta, tb[1], order)
-
-        if wa > wb:
-            return w, (op, tb, ta, order)
-        return w, (op, ta, tb, order)
-    elif op == 'or':
-        # fast path for machine-generated expression, that is likely to have
-        # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
-        order = x[2]
-        ws, ts, ss = [], [], []
-        def flushss():
-            if not ss:
-                return
-            if len(ss) == 1:
-                w, t = ss[0]
-            else:
-                s = '\0'.join(t[1] for w, t in ss)
-                y = ('func', ('symbol', '_list'), ('string', s), order)
-                w, t = _optimize(y, False)
-            ws.append(w)
-            ts.append(t)
-            del ss[:]
-        for y in getlist(x[1]):
-            w, t = _optimize(y, False)
-            if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
-                ss.append((w, t))
-                continue
-            flushss()
-            ws.append(w)
-            ts.append(t)
-        flushss()
-        if len(ts) == 1:
-            return ws[0], ts[0] # 'or' operation is fully optimized out
-        # we can't reorder trees by weight because it would change the order.
-        # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
-        #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
-        return max(ws), (op, ('list',) + tuple(ts), order)
-    elif op == 'not':
-        # Optimize not public() to _notpublic() because we have a fast version
-        if x[1][:3] == ('func', ('symbol', 'public'), None):
-            order = x[1][3]
-            newsym = ('func', ('symbol', '_notpublic'), None, order)
-            o = _optimize(newsym, not small)
-            return o[0], o[1]
-        else:
-            o = _optimize(x[1], not small)
-            order = x[2]
-            return o[0], (op, o[1], order)
-    elif op == 'rangeall':
-        return smallbonus, x
-    elif op in ('rangepre', 'rangepost', 'parentpost'):
-        o = _optimize(x[1], small)
-        order = x[2]
-        return o[0], (op, o[1], order)
-    elif op in ('dagrange', 'range', 'parent', 'ancestor'):
-        wa, ta = _optimize(x[1], small)
-        wb, tb = _optimize(x[2], small)
-        order = x[3]
-        return wa + wb, (op, ta, tb, order)
-    elif op == 'list':
-        ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
-        return sum(ws), (op,) + ts
-    elif op == 'keyvalue':
-        w, t = _optimize(x[2], small)
-        return w, (op, x[1], t)
-    elif op == 'func':
-        f = getsymbol(x[1])
-        wa, ta = _optimize(x[2], small)
-        if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
-                 'keyword', 'outgoing', 'user', 'destination'):
-            w = 10 # slow
-        elif f in ('modifies', 'adds', 'removes'):
-            w = 30 # slower
-        elif f == "contains":
-            w = 100 # very slow
-        elif f == "ancestor":
-            w = 1 * smallbonus
-        elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
-            w = 0
-        elif f == "sort":
-            w = 10 # assume most sorts look at changelog
-        else:
-            w = 1
-        order = x[3]
-        return w + wa, (op, x[1], ta, order)
-    raise ValueError('invalid operator %r' % op)
-
-def optimize(tree):
-    """Optimize evaluatable tree
-
-    All pseudo operations should be transformed beforehand.
-    """
-    _weight, newtree = _optimize(tree, small=True)
-    return newtree
-
-# the set of valid characters for the initial letter of symbols in
-# alias declarations and definitions
-_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
-
-def _parsewith(spec, lookup=None, syminitletters=None):
-    """Generate a parse tree of given spec with given tokenizing options
-
-    >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
-    ('func', ('symbol', 'foo'), ('symbol', '$1'))
-    >>> _parsewith('$1')
-    Traceback (most recent call last):
-      ...
-    ParseError: ("syntax error in revset '$1'", 0)
-    >>> _parsewith('foo bar')
-    Traceback (most recent call last):
-      ...
-    ParseError: ('invalid token', 4)
-    """
-    p = parser.parser(elements)
-    tree, pos = p.parse(tokenize(spec, lookup=lookup,
-                                 syminitletters=syminitletters))
-    if pos != len(spec):
-        raise error.ParseError(_('invalid token'), pos)
-    return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
-
-class _aliasrules(parser.basealiasrules):
-    """Parsing and expansion rule set of revset aliases"""
-    _section = _('revset alias')
-
-    @staticmethod
-    def _parse(spec):
-        """Parse alias declaration/definition ``spec``
-
-        This allows symbol names to use also ``$`` as an initial letter
-        (for backward compatibility), and callers of this function should
-        examine whether ``$`` is used also for unexpected symbols or not.
-        """
-        return _parsewith(spec, syminitletters=_aliassyminitletters)
-
-    @staticmethod
-    def _trygetfunc(tree):
-        if tree[0] == 'func' and tree[1][0] == 'symbol':
-            return tree[1][1], getlist(tree[2])
-
-def expandaliases(ui, tree):
-    aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
-    tree = _aliasrules.expand(aliases, tree)
-    # warn about problematic (but not referred) aliases
-    for name, alias in sorted(aliases.iteritems()):
-        if alias.error and not alias.warned:
-            ui.warn(_('warning: %s\n') % (alias.error))
-            alias.warned = True
-    return tree
-
-def foldconcat(tree):
-    """Fold elements to be concatenated by `##`
-    """
-    if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
-        return tree
-    if tree[0] == '_concat':
-        pending = [tree]
-        l = []
-        while pending:
-            e = pending.pop()
-            if e[0] == '_concat':
-                pending.extend(reversed(e[1:]))
-            elif e[0] in ('string', 'symbol'):
-                l.append(e[1])
-            else:
-                msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
-                raise error.ParseError(msg)
-        return ('string', ''.join(l))
-    else:
-        return tuple(foldconcat(t) for t in tree)
-
-def parse(spec, lookup=None):
-    return _parsewith(spec, lookup=lookup)
-
 def posttreebuilthook(tree, repo):
     # hook for extensions to execute code on the optimized tree
     pass
@@ -2785,15 +2248,16 @@
     if repo:
         lookup = repo.__contains__
     if len(specs) == 1:
-        tree = parse(specs[0], lookup)
+        tree = revsetlang.parse(specs[0], lookup)
     else:
-        tree = ('or', ('list',) + tuple(parse(s, lookup) for s in specs))
+        tree = ('or',
+                ('list',) + tuple(revsetlang.parse(s, lookup) for s in specs))
 
     if ui:
-        tree = expandaliases(ui, tree)
-    tree = foldconcat(tree)
-    tree = analyze(tree, order)
-    tree = optimize(tree)
+        tree = revsetlang.expandaliases(ui, tree)
+    tree = revsetlang.foldconcat(tree)
+    tree = revsetlang.analyze(tree, order)
+    tree = revsetlang.optimize(tree)
     posttreebuilthook(tree, repo)
     return makematcher(tree)
 
@@ -2809,121 +2273,6 @@
         return result
     return mfunc
 
-def formatspec(expr, *args):
-    '''
-    This is a convenience function for using revsets internally, and
-    escapes arguments appropriately. Aliases are intentionally ignored
-    so that intended expression behavior isn't accidentally subverted.
-
-    Supported arguments:
-
-    %r = revset expression, parenthesized
-    %d = int(arg), no quoting
-    %s = string(arg), escaped and single-quoted
-    %b = arg.branch(), escaped and single-quoted
-    %n = hex(arg), single-quoted
-    %% = a literal '%'
-
-    Prefixing the type with 'l' specifies a parenthesized list of that type.
-
-    >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
-    '(10 or 11):: and ((this()) or (that()))'
-    >>> formatspec('%d:: and not %d::', 10, 20)
-    '10:: and not 20::'
-    >>> formatspec('%ld or %ld', [], [1])
-    "_list('') or 1"
-    >>> formatspec('keyword(%s)', 'foo\\xe9')
-    "keyword('foo\\\\xe9')"
-    >>> b = lambda: 'default'
-    >>> b.branch = b
-    >>> formatspec('branch(%b)', b)
-    "branch('default')"
-    >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
-    "root(_list('a\\x00b\\x00c\\x00d'))"
-    '''
-
-    def quote(s):
-        return repr(str(s))
-
-    def argtype(c, arg):
-        if c == 'd':
-            return str(int(arg))
-        elif c == 's':
-            return quote(arg)
-        elif c == 'r':
-            parse(arg) # make sure syntax errors are confined
-            return '(%s)' % arg
-        elif c == 'n':
-            return quote(node.hex(arg))
-        elif c == 'b':
-            return quote(arg.branch())
-
-    def listexp(s, t):
-        l = len(s)
-        if l == 0:
-            return "_list('')"
-        elif l == 1:
-            return argtype(t, s[0])
-        elif t == 'd':
-            return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
-        elif t == 's':
-            return "_list('%s')" % "\0".join(s)
-        elif t == 'n':
-            return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
-        elif t == 'b':
-            return "_list('%s')" % "\0".join(a.branch() for a in s)
-
-        m = l // 2
-        return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
-
-    ret = ''
-    pos = 0
-    arg = 0
-    while pos < len(expr):
-        c = expr[pos]
-        if c == '%':
-            pos += 1
-            d = expr[pos]
-            if d == '%':
-                ret += d
-            elif d in 'dsnbr':
-                ret += argtype(d, args[arg])
-                arg += 1
-            elif d == 'l':
-                # a list of some type
-                pos += 1
-                d = expr[pos]
-                ret += listexp(list(args[arg]), d)
-                arg += 1
-            else:
-                raise error.Abort(_('unexpected revspec format character %s')
-                                  % d)
-        else:
-            ret += c
-        pos += 1
-
-    return ret
-
-def prettyformat(tree):
-    return parser.prettyformat(tree, ('string', 'symbol'))
-
-def depth(tree):
-    if isinstance(tree, tuple):
-        return max(map(depth, tree)) + 1
-    else:
-        return 0
-
-def funcsused(tree):
-    if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
-        return set()
-    else:
-        funcs = set()
-        for s in tree[1:]:
-            funcs |= funcsused(s)
-        if tree[0] == 'func':
-            funcs.add(tree[1][1])
-        return funcs
-
 def loadpredicate(ui, extname, registrarobj):
     """Load revset predicates from specified registrarobj
     """
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mercurial/revsetlang.py	Sun Feb 19 18:19:33 2017 +0900
@@ -0,0 +1,684 @@
+# revsetlang.py - parser, tokenizer and utility for revision set language
+#
+# Copyright 2010 Matt Mackall <mpm@selenic.com>
+#
+# This software may be used and distributed according to the terms of the
+# GNU General Public License version 2 or any later version.
+
+from __future__ import absolute_import
+
+import string
+
+from .i18n import _
+from . import (
+    error,
+    node,
+    parser,
+    pycompat,
+)
+
+elements = {
+    # token-type: binding-strength, primary, prefix, infix, suffix
+    "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
+    "##": (20, None, None, ("_concat", 20), None),
+    "~": (18, None, None, ("ancestor", 18), None),
+    "^": (18, None, None, ("parent", 18), "parentpost"),
+    "-": (5, None, ("negate", 19), ("minus", 5), None),
+    "::": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
+    "..": (17, None, ("dagrangepre", 17), ("dagrange", 17), "dagrangepost"),
+    ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
+    "not": (10, None, ("not", 10), None, None),
+    "!": (10, None, ("not", 10), None, None),
+    "and": (5, None, None, ("and", 5), None),
+    "&": (5, None, None, ("and", 5), None),
+    "%": (5, None, None, ("only", 5), "onlypost"),
+    "or": (4, None, None, ("or", 4), None),
+    "|": (4, None, None, ("or", 4), None),
+    "+": (4, None, None, ("or", 4), None),
+    "=": (3, None, None, ("keyvalue", 3), None),
+    ",": (2, None, None, ("list", 2), None),
+    ")": (0, None, None, None, None),
+    "symbol": (0, "symbol", None, None, None),
+    "string": (0, "string", None, None, None),
+    "end": (0, None, None, None, None),
+}
+
+keywords = set(['and', 'or', 'not'])
+
+# default set of valid characters for the initial letter of symbols
+_syminitletters = set(
+    string.ascii_letters +
+    string.digits + pycompat.sysstr('._@')) | set(map(chr, xrange(128, 256)))
+
+# default set of valid characters for non-initial letters of symbols
+_symletters = _syminitletters | set(pycompat.sysstr('-/'))
+
+def tokenize(program, lookup=None, syminitletters=None, symletters=None):
+    '''
+    Parse a revset statement into a stream of tokens
+
+    ``syminitletters`` is the set of valid characters for the initial
+    letter of symbols.
+
+    By default, character ``c`` is recognized as valid for initial
+    letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
+
+    ``symletters`` is the set of valid characters for non-initial
+    letters of symbols.
+
+    By default, character ``c`` is recognized as valid for non-initial
+    letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
+
+    Check that @ is a valid unquoted token character (issue3686):
+    >>> list(tokenize("@::"))
+    [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
+
+    '''
+    if syminitletters is None:
+        syminitletters = _syminitletters
+    if symletters is None:
+        symletters = _symletters
+
+    if program and lookup:
+        # attempt to parse old-style ranges first to deal with
+        # things like old-tag which contain query metacharacters
+        parts = program.split(':', 1)
+        if all(lookup(sym) for sym in parts if sym):
+            if parts[0]:
+                yield ('symbol', parts[0], 0)
+            if len(parts) > 1:
+                s = len(parts[0])
+                yield (':', None, s)
+                if parts[1]:
+                    yield ('symbol', parts[1], s + 1)
+            yield ('end', None, len(program))
+            return
+
+    pos, l = 0, len(program)
+    while pos < l:
+        c = program[pos]
+        if c.isspace(): # skip inter-token whitespace
+            pass
+        elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
+            yield ('::', None, pos)
+            pos += 1 # skip ahead
+        elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
+            yield ('..', None, pos)
+            pos += 1 # skip ahead
+        elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
+            yield ('##', None, pos)
+            pos += 1 # skip ahead
+        elif c in "():=,-|&+!~^%": # handle simple operators
+            yield (c, None, pos)
+        elif (c in '"\'' or c == 'r' and
+              program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
+            if c == 'r':
+                pos += 1
+                c = program[pos]
+                decode = lambda x: x
+            else:
+                decode = parser.unescapestr
+            pos += 1
+            s = pos
+            while pos < l: # find closing quote
+                d = program[pos]
+                if d == '\\': # skip over escaped characters
+                    pos += 2
+                    continue
+                if d == c:
+                    yield ('string', decode(program[s:pos]), s)
+                    break
+                pos += 1
+            else:
+                raise error.ParseError(_("unterminated string"), s)
+        # gather up a symbol/keyword
+        elif c in syminitletters:
+            s = pos
+            pos += 1
+            while pos < l: # find end of symbol
+                d = program[pos]
+                if d not in symletters:
+                    break
+                if d == '.' and program[pos - 1] == '.': # special case for ..
+                    pos -= 1
+                    break
+                pos += 1
+            sym = program[s:pos]
+            if sym in keywords: # operator keywords
+                yield (sym, None, s)
+            elif '-' in sym:
+                # some jerk gave us foo-bar-baz, try to check if it's a symbol
+                if lookup and lookup(sym):
+                    # looks like a real symbol
+                    yield ('symbol', sym, s)
+                else:
+                    # looks like an expression
+                    parts = sym.split('-')
+                    for p in parts[:-1]:
+                        if p: # possible consecutive -
+                            yield ('symbol', p, s)
+                        s += len(p)
+                        yield ('-', None, pos)
+                        s += 1
+                    if parts[-1]: # possible trailing -
+                        yield ('symbol', parts[-1], s)
+            else:
+                yield ('symbol', sym, s)
+            pos -= 1
+        else:
+            raise error.ParseError(_("syntax error in revset '%s'") %
+                                   program, pos)
+        pos += 1
+    yield ('end', None, pos)
+
+# helpers
+
+_notset = object()
+
+def getsymbol(x):
+    if x and x[0] == 'symbol':
+        return x[1]
+    raise error.ParseError(_('not a symbol'))
+
+def getstring(x, err):
+    if x and (x[0] == 'string' or x[0] == 'symbol'):
+        return x[1]
+    raise error.ParseError(err)
+
+def getinteger(x, err, default=_notset):
+    if not x and default is not _notset:
+        return default
+    try:
+        return int(getstring(x, err))
+    except ValueError:
+        raise error.ParseError(err)
+
+def getlist(x):
+    if not x:
+        return []
+    if x[0] == 'list':
+        return list(x[1:])
+    return [x]
+
+def getrange(x, err):
+    if not x:
+        raise error.ParseError(err)
+    op = x[0]
+    if op == 'range':
+        return x[1], x[2]
+    elif op == 'rangepre':
+        return None, x[1]
+    elif op == 'rangepost':
+        return x[1], None
+    elif op == 'rangeall':
+        return None, None
+    raise error.ParseError(err)
+
+def getargs(x, min, max, err):
+    l = getlist(x)
+    if len(l) < min or (max >= 0 and len(l) > max):
+        raise error.ParseError(err)
+    return l
+
+def getargsdict(x, funcname, keys):
+    return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
+                                keyvaluenode='keyvalue', keynode='symbol')
+
+# Constants for ordering requirement, used in _analyze():
+#
+# If 'define', any nested functions and operations can change the ordering of
+# the entries in the set. If 'follow', any nested functions and operations
+# should take the ordering specified by the first operand to the '&' operator.
+#
+# For instance,
+#
+#   X & (Y | Z)
+#   ^   ^^^^^^^
+#   |   follow
+#   define
+#
+# will be evaluated as 'or(y(x()), z(x()))', where 'x()' can change the order
+# of the entries in the set, but 'y()', 'z()' and 'or()' shouldn't.
+#
+# 'any' means the order doesn't matter. For instance,
+#
+#   X & !Y
+#        ^
+#        any
+#
+# 'y()' can either enforce its ordering requirement or take the ordering
+# specified by 'x()' because 'not()' doesn't care the order.
+#
+# Transition of ordering requirement:
+#
+# 1. starts with 'define'
+# 2. shifts to 'follow' by 'x & y'
+# 3. changes back to 'define' on function call 'f(x)' or function-like
+#    operation 'x (f) y' because 'f' may have its own ordering requirement
+#    for 'x' and 'y' (e.g. 'first(x)')
+#
+anyorder = 'any'        # don't care the order
+defineorder = 'define'  # should define the order
+followorder = 'follow'  # must follow the current order
+
+# transition table for 'x & y', from the current expression 'x' to 'y'
+_tofolloworder = {
+    anyorder: anyorder,
+    defineorder: followorder,
+    followorder: followorder,
+}
+
+def _matchonly(revs, bases):
+    """
+    >>> f = lambda *args: _matchonly(*map(parse, args))
+    >>> f('ancestors(A)', 'not ancestors(B)')
+    ('list', ('symbol', 'A'), ('symbol', 'B'))
+    """
+    if (revs is not None
+        and revs[0] == 'func'
+        and getsymbol(revs[1]) == 'ancestors'
+        and bases is not None
+        and bases[0] == 'not'
+        and bases[1][0] == 'func'
+        and getsymbol(bases[1][1]) == 'ancestors'):
+        return ('list', revs[2], bases[1][2])
+
+def _fixops(x):
+    """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
+    handled well by our simple top-down parser"""
+    if not isinstance(x, tuple):
+        return x
+
+    op = x[0]
+    if op == 'parent':
+        # x^:y means (x^) : y, not x ^ (:y)
+        # x^:  means (x^) :,   not x ^ (:)
+        post = ('parentpost', x[1])
+        if x[2][0] == 'dagrangepre':
+            return _fixops(('dagrange', post, x[2][1]))
+        elif x[2][0] == 'rangepre':
+            return _fixops(('range', post, x[2][1]))
+        elif x[2][0] == 'rangeall':
+            return _fixops(('rangepost', post))
+    elif op == 'or':
+        # make number of arguments deterministic:
+        # x + y + z -> (or x y z) -> (or (list x y z))
+        return (op, _fixops(('list',) + x[1:]))
+
+    return (op,) + tuple(_fixops(y) for y in x[1:])
+
+def _analyze(x, order):
+    if x is None:
+        return x
+
+    op = x[0]
+    if op == 'minus':
+        return _analyze(('and', x[1], ('not', x[2])), order)
+    elif op == 'only':
+        t = ('func', ('symbol', 'only'), ('list', x[1], x[2]))
+        return _analyze(t, order)
+    elif op == 'onlypost':
+        return _analyze(('func', ('symbol', 'only'), x[1]), order)
+    elif op == 'dagrangepre':
+        return _analyze(('func', ('symbol', 'ancestors'), x[1]), order)
+    elif op == 'dagrangepost':
+        return _analyze(('func', ('symbol', 'descendants'), x[1]), order)
+    elif op == 'negate':
+        s = getstring(x[1], _("can't negate that"))
+        return _analyze(('string', '-' + s), order)
+    elif op in ('string', 'symbol'):
+        return x
+    elif op == 'and':
+        ta = _analyze(x[1], order)
+        tb = _analyze(x[2], _tofolloworder[order])
+        return (op, ta, tb, order)
+    elif op == 'or':
+        return (op, _analyze(x[1], order), order)
+    elif op == 'not':
+        return (op, _analyze(x[1], anyorder), order)
+    elif op == 'rangeall':
+        return (op, None, order)
+    elif op in ('rangepre', 'rangepost', 'parentpost'):
+        return (op, _analyze(x[1], defineorder), order)
+    elif op == 'group':
+        return _analyze(x[1], order)
+    elif op in ('dagrange', 'range', 'parent', 'ancestor'):
+        ta = _analyze(x[1], defineorder)
+        tb = _analyze(x[2], defineorder)
+        return (op, ta, tb, order)
+    elif op == 'list':
+        return (op,) + tuple(_analyze(y, order) for y in x[1:])
+    elif op == 'keyvalue':
+        return (op, x[1], _analyze(x[2], order))
+    elif op == 'func':
+        f = getsymbol(x[1])
+        d = defineorder
+        if f == 'present':
+            # 'present(set)' is known to return the argument set with no
+            # modification, so forward the current order to its argument
+            d = order
+        return (op, x[1], _analyze(x[2], d), order)
+    raise ValueError('invalid operator %r' % op)
+
+def analyze(x, order=defineorder):
+    """Transform raw parsed tree to evaluatable tree which can be fed to
+    optimize() or getset()
+
+    All pseudo operations should be mapped to real operations or functions
+    defined in methods or symbols table respectively.
+
+    'order' specifies how the current expression 'x' is ordered (see the
+    constants defined above.)
+    """
+    return _analyze(x, order)
+
+def _optimize(x, small):
+    if x is None:
+        return 0, x
+
+    smallbonus = 1
+    if small:
+        smallbonus = .5
+
+    op = x[0]
+    if op in ('string', 'symbol'):
+        return smallbonus, x # single revisions are small
+    elif op == 'and':
+        wa, ta = _optimize(x[1], True)
+        wb, tb = _optimize(x[2], True)
+        order = x[3]
+        w = min(wa, wb)
+
+        # (::x and not ::y)/(not ::y and ::x) have a fast path
+        tm = _matchonly(ta, tb) or _matchonly(tb, ta)
+        if tm:
+            return w, ('func', ('symbol', 'only'), tm, order)
+
+        if tb is not None and tb[0] == 'not':
+            return wa, ('difference', ta, tb[1], order)
+
+        if wa > wb:
+            return w, (op, tb, ta, order)
+        return w, (op, ta, tb, order)
+    elif op == 'or':
+        # fast path for machine-generated expression, that is likely to have
+        # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
+        order = x[2]
+        ws, ts, ss = [], [], []
+        def flushss():
+            if not ss:
+                return
+            if len(ss) == 1:
+                w, t = ss[0]
+            else:
+                s = '\0'.join(t[1] for w, t in ss)
+                y = ('func', ('symbol', '_list'), ('string', s), order)
+                w, t = _optimize(y, False)
+            ws.append(w)
+            ts.append(t)
+            del ss[:]
+        for y in getlist(x[1]):
+            w, t = _optimize(y, False)
+            if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
+                ss.append((w, t))
+                continue
+            flushss()
+            ws.append(w)
+            ts.append(t)
+        flushss()
+        if len(ts) == 1:
+            return ws[0], ts[0] # 'or' operation is fully optimized out
+        # we can't reorder trees by weight because it would change the order.
+        # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a")
+        #   ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0]))
+        return max(ws), (op, ('list',) + tuple(ts), order)
+    elif op == 'not':
+        # Optimize not public() to _notpublic() because we have a fast version
+        if x[1][:3] == ('func', ('symbol', 'public'), None):
+            order = x[1][3]
+            newsym = ('func', ('symbol', '_notpublic'), None, order)
+            o = _optimize(newsym, not small)
+            return o[0], o[1]
+        else:
+            o = _optimize(x[1], not small)
+            order = x[2]
+            return o[0], (op, o[1], order)
+    elif op == 'rangeall':
+        return smallbonus, x
+    elif op in ('rangepre', 'rangepost', 'parentpost'):
+        o = _optimize(x[1], small)
+        order = x[2]
+        return o[0], (op, o[1], order)
+    elif op in ('dagrange', 'range', 'parent', 'ancestor'):
+        wa, ta = _optimize(x[1], small)
+        wb, tb = _optimize(x[2], small)
+        order = x[3]
+        return wa + wb, (op, ta, tb, order)
+    elif op == 'list':
+        ws, ts = zip(*(_optimize(y, small) for y in x[1:]))
+        return sum(ws), (op,) + ts
+    elif op == 'keyvalue':
+        w, t = _optimize(x[2], small)
+        return w, (op, x[1], t)
+    elif op == 'func':
+        f = getsymbol(x[1])
+        wa, ta = _optimize(x[2], small)
+        if f in ('author', 'branch', 'closed', 'date', 'desc', 'file', 'grep',
+                 'keyword', 'outgoing', 'user', 'destination'):
+            w = 10 # slow
+        elif f in ('modifies', 'adds', 'removes'):
+            w = 30 # slower
+        elif f == "contains":
+            w = 100 # very slow
+        elif f == "ancestor":
+            w = 1 * smallbonus
+        elif f in ('reverse', 'limit', 'first', 'wdir', '_intlist'):
+            w = 0
+        elif f == "sort":
+            w = 10 # assume most sorts look at changelog
+        else:
+            w = 1
+        order = x[3]
+        return w + wa, (op, x[1], ta, order)
+    raise ValueError('invalid operator %r' % op)
+
+def optimize(tree):
+    """Optimize evaluatable tree
+
+    All pseudo operations should be transformed beforehand.
+    """
+    _weight, newtree = _optimize(tree, small=True)
+    return newtree
+
+# the set of valid characters for the initial letter of symbols in
+# alias declarations and definitions
+_aliassyminitletters = _syminitletters | set(pycompat.sysstr('$'))
+
+def _parsewith(spec, lookup=None, syminitletters=None):
+    """Generate a parse tree of given spec with given tokenizing options
+
+    >>> _parsewith('foo($1)', syminitletters=_aliassyminitletters)
+    ('func', ('symbol', 'foo'), ('symbol', '$1'))
+    >>> _parsewith('$1')
+    Traceback (most recent call last):
+      ...
+    ParseError: ("syntax error in revset '$1'", 0)
+    >>> _parsewith('foo bar')
+    Traceback (most recent call last):
+      ...
+    ParseError: ('invalid token', 4)
+    """
+    p = parser.parser(elements)
+    tree, pos = p.parse(tokenize(spec, lookup=lookup,
+                                 syminitletters=syminitletters))
+    if pos != len(spec):
+        raise error.ParseError(_('invalid token'), pos)
+    return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
+
+class _aliasrules(parser.basealiasrules):
+    """Parsing and expansion rule set of revset aliases"""
+    _section = _('revset alias')
+
+    @staticmethod
+    def _parse(spec):
+        """Parse alias declaration/definition ``spec``
+
+        This allows symbol names to use also ``$`` as an initial letter
+        (for backward compatibility), and callers of this function should
+        examine whether ``$`` is used also for unexpected symbols or not.
+        """
+        return _parsewith(spec, syminitletters=_aliassyminitletters)
+
+    @staticmethod
+    def _trygetfunc(tree):
+        if tree[0] == 'func' and tree[1][0] == 'symbol':
+            return tree[1][1], getlist(tree[2])
+
+def expandaliases(ui, tree):
+    aliases = _aliasrules.buildmap(ui.configitems('revsetalias'))
+    tree = _aliasrules.expand(aliases, tree)
+    # warn about problematic (but not referred) aliases
+    for name, alias in sorted(aliases.iteritems()):
+        if alias.error and not alias.warned:
+            ui.warn(_('warning: %s\n') % (alias.error))
+            alias.warned = True
+    return tree
+
+def foldconcat(tree):
+    """Fold elements to be concatenated by `##`
+    """
+    if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
+        return tree
+    if tree[0] == '_concat':
+        pending = [tree]
+        l = []
+        while pending:
+            e = pending.pop()
+            if e[0] == '_concat':
+                pending.extend(reversed(e[1:]))
+            elif e[0] in ('string', 'symbol'):
+                l.append(e[1])
+            else:
+                msg = _("\"##\" can't concatenate \"%s\" element") % (e[0])
+                raise error.ParseError(msg)
+        return ('string', ''.join(l))
+    else:
+        return tuple(foldconcat(t) for t in tree)
+
+def parse(spec, lookup=None):
+    return _parsewith(spec, lookup=lookup)
+
+def formatspec(expr, *args):
+    '''
+    This is a convenience function for using revsets internally, and
+    escapes arguments appropriately. Aliases are intentionally ignored
+    so that intended expression behavior isn't accidentally subverted.
+
+    Supported arguments:
+
+    %r = revset expression, parenthesized
+    %d = int(arg), no quoting
+    %s = string(arg), escaped and single-quoted
+    %b = arg.branch(), escaped and single-quoted
+    %n = hex(arg), single-quoted
+    %% = a literal '%'
+
+    Prefixing the type with 'l' specifies a parenthesized list of that type.
+
+    >>> formatspec('%r:: and %lr', '10 or 11', ("this()", "that()"))
+    '(10 or 11):: and ((this()) or (that()))'
+    >>> formatspec('%d:: and not %d::', 10, 20)
+    '10:: and not 20::'
+    >>> formatspec('%ld or %ld', [], [1])
+    "_list('') or 1"
+    >>> formatspec('keyword(%s)', 'foo\\xe9')
+    "keyword('foo\\\\xe9')"
+    >>> b = lambda: 'default'
+    >>> b.branch = b
+    >>> formatspec('branch(%b)', b)
+    "branch('default')"
+    >>> formatspec('root(%ls)', ['a', 'b', 'c', 'd'])
+    "root(_list('a\\x00b\\x00c\\x00d'))"
+    '''
+
+    def quote(s):
+        return repr(str(s))
+
+    def argtype(c, arg):
+        if c == 'd':
+            return str(int(arg))
+        elif c == 's':
+            return quote(arg)
+        elif c == 'r':
+            parse(arg) # make sure syntax errors are confined
+            return '(%s)' % arg
+        elif c == 'n':
+            return quote(node.hex(arg))
+        elif c == 'b':
+            return quote(arg.branch())
+
+    def listexp(s, t):
+        l = len(s)
+        if l == 0:
+            return "_list('')"
+        elif l == 1:
+            return argtype(t, s[0])
+        elif t == 'd':
+            return "_intlist('%s')" % "\0".join(str(int(a)) for a in s)
+        elif t == 's':
+            return "_list('%s')" % "\0".join(s)
+        elif t == 'n':
+            return "_hexlist('%s')" % "\0".join(node.hex(a) for a in s)
+        elif t == 'b':
+            return "_list('%s')" % "\0".join(a.branch() for a in s)
+
+        m = l // 2
+        return '(%s or %s)' % (listexp(s[:m], t), listexp(s[m:], t))
+
+    ret = ''
+    pos = 0
+    arg = 0
+    while pos < len(expr):
+        c = expr[pos]
+        if c == '%':
+            pos += 1
+            d = expr[pos]
+            if d == '%':
+                ret += d
+            elif d in 'dsnbr':
+                ret += argtype(d, args[arg])
+                arg += 1
+            elif d == 'l':
+                # a list of some type
+                pos += 1
+                d = expr[pos]
+                ret += listexp(list(args[arg]), d)
+                arg += 1
+            else:
+                raise error.Abort(_('unexpected revspec format character %s')
+                                  % d)
+        else:
+            ret += c
+        pos += 1
+
+    return ret
+
+def prettyformat(tree):
+    return parser.prettyformat(tree, ('string', 'symbol'))
+
+def depth(tree):
+    if isinstance(tree, tuple):
+        return max(map(depth, tree)) + 1
+    else:
+        return 0
+
+def funcsused(tree):
+    if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
+        return set()
+    else:
+        funcs = set()
+        for s in tree[1:]:
+            funcs |= funcsused(s)
+        if tree[0] == 'func':
+            funcs.add(tree[1][1])
+        return funcs
--- a/mercurial/scmutil.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/scmutil.py	Sun Feb 19 18:19:33 2017 +0900
@@ -30,6 +30,7 @@
     phases,
     pycompat,
     revset,
+    revsetlang,
     similar,
     util,
 )
@@ -890,7 +891,7 @@
     return repo[l.last()]
 
 def _pairspec(revspec):
-    tree = revset.parse(revspec)
+    tree = revsetlang.parse(revspec)
     return tree and tree[0] in ('range', 'rangepre', 'rangepost', 'rangeall')
 
 def revpair(repo, revs):
@@ -936,7 +937,7 @@
     revision numbers.
 
     It is assumed the revsets are already formatted. If you have arguments
-    that need to be expanded in the revset, call ``revset.formatspec()``
+    that need to be expanded in the revset, call ``revsetlang.formatspec()``
     and pass the result as an element of ``specs``.
 
     Specifying a single revset is allowed.
@@ -947,7 +948,7 @@
     allspecs = []
     for spec in specs:
         if isinstance(spec, int):
-            spec = revset.formatspec('rev(%d)', spec)
+            spec = revsetlang.formatspec('rev(%d)', spec)
         allspecs.append(spec)
     m = revset.matchany(repo.ui, allspecs, repo)
     return m(repo)
--- a/mercurial/templater.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/mercurial/templater.py	Sun Feb 19 18:19:33 2017 +0900
@@ -20,6 +20,7 @@
     pycompat,
     registrar,
     revset as revsetmod,
+    revsetlang,
     templatefilters,
     templatekw,
     util,
@@ -778,7 +779,7 @@
 
     if len(args) > 1:
         formatargs = [evalfuncarg(context, mapping, a) for a in args[1:]]
-        revs = query(revsetmod.formatspec(raw, *formatargs))
+        revs = query(revsetlang.formatspec(raw, *formatargs))
         revs = list(revs)
     else:
         revsetcache = mapping['cache'].setdefault("revsetcache", {})
--- a/tests/test-doctest.py	Sun Feb 19 18:16:09 2017 +0900
+++ b/tests/test-doctest.py	Sun Feb 19 18:19:33 2017 +0900
@@ -28,7 +28,7 @@
 testmod('mercurial.patch')
 testmod('mercurial.pathutil')
 testmod('mercurial.parser')
-testmod('mercurial.revset')
+testmod('mercurial.revsetlang')
 testmod('mercurial.smartset')
 testmod('mercurial.store')
 testmod('mercurial.subrepo')
--- a/tests/test-glog.t	Sun Feb 19 18:16:09 2017 +0900
+++ b/tests/test-glog.t	Sun Feb 19 18:19:33 2017 +0900
@@ -82,18 +82,18 @@
   > }
 
   $ cat > printrevset.py <<EOF
-  > from mercurial import extensions, revset, commands, cmdutil
+  > from mercurial import extensions, revsetlang, commands, cmdutil
   > 
   > def uisetup(ui):
   >     def printrevset(orig, ui, repo, *pats, **opts):
   >         if opts.get('print_revset'):
   >             expr = cmdutil.getgraphlogrevs(repo, pats, opts)[1]
   >             if expr:
-  >                 tree = revset.parse(expr)
+  >                 tree = revsetlang.parse(expr)
   >             else:
   >                 tree = []
   >             ui.write('%r\n' % (opts.get('rev', []),))
-  >             ui.write(revset.prettyformat(tree) + '\n')
+  >             ui.write(revsetlang.prettyformat(tree) + '\n')
   >             return 0
   >         return orig(ui, repo, *pats, **opts)
   >     entry = extensions.wrapcommand(commands.table, 'log', printrevset)
--- a/tests/test-revset.t	Sun Feb 19 18:16:09 2017 +0900
+++ b/tests/test-revset.t	Sun Feb 19 18:19:33 2017 +0900
@@ -40,6 +40,7 @@
   >     cmdutil,
   >     node as nodemod,
   >     revset,
+  >     revsetlang,
   >     smartset,
   > )
   > cmdtable = {}
@@ -50,13 +51,14 @@
   > def debugrevlistspec(ui, repo, fmt, *args, **opts):
   >     if opts['bin']:
   >         args = map(nodemod.bin, args)
-  >     expr = revset.formatspec(fmt, list(args))
+  >     expr = revsetlang.formatspec(fmt, list(args))
   >     if ui.verbose:
-  >         tree = revset.parse(expr, lookup=repo.__contains__)
-  >         ui.note(revset.prettyformat(tree), "\n")
+  >         tree = revsetlang.parse(expr, lookup=repo.__contains__)
+  >         ui.note(revsetlang.prettyformat(tree), "\n")
   >         if opts["optimize"]:
-  >             opttree = revset.optimize(revset.analyze(tree))
-  >             ui.note("* optimized:\n", revset.prettyformat(opttree), "\n")
+  >             opttree = revsetlang.optimize(revsetlang.analyze(tree))
+  >             ui.note("* optimized:\n", revsetlang.prettyformat(opttree),
+  >                     "\n")
   >     func = revset.match(ui, expr, repo)
   >     revs = func(repo)
   >     if ui.verbose: