mercurial/revsetlang.py
changeset 43076 2372284d9457
parent 41835 ddb174511f1b
child 43077 687b865b95ad
equal deleted inserted replaced
43075:57875cf423c9 43076:2372284d9457
    16     parser,
    16     parser,
    17     pycompat,
    17     pycompat,
    18     smartset,
    18     smartset,
    19     util,
    19     util,
    20 )
    20 )
    21 from .utils import (
    21 from .utils import stringutil
    22     stringutil,
       
    23 )
       
    24 
    22 
    25 elements = {
    23 elements = {
    26     # token-type: binding-strength, primary, prefix, infix, suffix
    24     # token-type: binding-strength, primary, prefix, infix, suffix
    27     "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
    25     "(": (21, None, ("group", 1, ")"), ("func", 1, ")"), None),
    28     "[": (21, None, None, ("subscript", 1, "]"), None),
    26     "[": (21, None, None, ("subscript", 1, "]"), None),
    29     "#": (21, None, None, ("relation", 21), None),
    27     "#": (21, None, None, ("relation", 21), None),
    30     "##": (20, None, None, ("_concat", 20), None),
    28     "##": (20, None, None, ("_concat", 20), None),
    31     "~": (18, None, None, ("ancestor", 18), None),
    29     "~": (18, None, None, ("ancestor", 18), None),
    32     "^": (18, None, None, ("parent", 18), "parentpost"),
    30     "^": (18, None, None, ("parent", 18), "parentpost"),
    33     "-": (5, None, ("negate", 19), ("minus", 5), None),
    31     "-": (5, None, ("negate", 19), ("minus", 5), None),
    34     "::": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
    32     "::": (
    35            "dagrangepost"),
    33         17,
    36     "..": (17, "dagrangeall", ("dagrangepre", 17), ("dagrange", 17),
    34         "dagrangeall",
    37            "dagrangepost"),
    35         ("dagrangepre", 17),
       
    36         ("dagrange", 17),
       
    37         "dagrangepost",
       
    38     ),
       
    39     "..": (
       
    40         17,
       
    41         "dagrangeall",
       
    42         ("dagrangepre", 17),
       
    43         ("dagrange", 17),
       
    44         "dagrangepost",
       
    45     ),
    38     ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
    46     ":": (15, "rangeall", ("rangepre", 15), ("range", 15), "rangepost"),
    39     "not": (10, None, ("not", 10), None, None),
    47     "not": (10, None, ("not", 10), None, None),
    40     "!": (10, None, ("not", 10), None, None),
    48     "!": (10, None, ("not", 10), None, None),
    41     "and": (5, None, None, ("and", 5), None),
    49     "and": (5, None, None, ("and", 5), None),
    42     "&": (5, None, None, ("and", 5), None),
    50     "&": (5, None, None, ("and", 5), None),
    59 
    67 
    60 _quoteletters = {'"', "'"}
    68 _quoteletters = {'"', "'"}
    61 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
    69 _simpleopletters = set(pycompat.iterbytestr("()[]#:=,-|&+!~^%"))
    62 
    70 
    63 # default set of valid characters for the initial letter of symbols
    71 # default set of valid characters for the initial letter of symbols
    64 _syminitletters = set(pycompat.iterbytestr(
    72 _syminitletters = set(
    65     pycompat.sysbytes(string.ascii_letters) +
    73     pycompat.iterbytestr(
    66     pycompat.sysbytes(string.digits) +
    74         pycompat.sysbytes(string.ascii_letters)
    67     '._@')) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
    75         + pycompat.sysbytes(string.digits)
       
    76         + '._@'
       
    77     )
       
    78 ) | set(map(pycompat.bytechr, pycompat.xrange(128, 256)))
    68 
    79 
    69 # default set of valid characters for non-initial letters of symbols
    80 # default set of valid characters for non-initial letters of symbols
    70 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
    81 _symletters = _syminitletters | set(pycompat.iterbytestr('-/'))
       
    82 
    71 
    83 
    72 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
    84 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
    73     '''
    85     '''
    74     Parse a revset statement into a stream of tokens
    86     Parse a revset statement into a stream of tokens
    75 
    87 
    89     >>> list(tokenize(b"@::"))
   101     >>> list(tokenize(b"@::"))
    90     [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
   102     [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
    91 
   103 
    92     '''
   104     '''
    93     if not isinstance(program, bytes):
   105     if not isinstance(program, bytes):
    94         raise error.ProgrammingError('revset statement must be bytes, got %r'
   106         raise error.ProgrammingError(
    95                                      % program)
   107             'revset statement must be bytes, got %r' % program
       
   108         )
    96     program = pycompat.bytestr(program)
   109     program = pycompat.bytestr(program)
    97     if syminitletters is None:
   110     if syminitletters is None:
    98         syminitletters = _syminitletters
   111         syminitletters = _syminitletters
    99     if symletters is None:
   112     if symletters is None:
   100         symletters = _symletters
   113         symletters = _symletters
   115             return
   128             return
   116 
   129 
   117     pos, l = 0, len(program)
   130     pos, l = 0, len(program)
   118     while pos < l:
   131     while pos < l:
   119         c = program[pos]
   132         c = program[pos]
   120         if c.isspace(): # skip inter-token whitespace
   133         if c.isspace():  # skip inter-token whitespace
   121             pass
   134             pass
   122         elif c == ':' and program[pos:pos + 2] == '::': # look ahead carefully
   135         elif (
       
   136             c == ':' and program[pos : pos + 2] == '::'
       
   137         ):  # look ahead carefully
   123             yield ('::', None, pos)
   138             yield ('::', None, pos)
   124             pos += 1 # skip ahead
   139             pos += 1  # skip ahead
   125         elif c == '.' and program[pos:pos + 2] == '..': # look ahead carefully
   140         elif (
       
   141             c == '.' and program[pos : pos + 2] == '..'
       
   142         ):  # look ahead carefully
   126             yield ('..', None, pos)
   143             yield ('..', None, pos)
   127             pos += 1 # skip ahead
   144             pos += 1  # skip ahead
   128         elif c == '#' and program[pos:pos + 2] == '##': # look ahead carefully
   145         elif (
       
   146             c == '#' and program[pos : pos + 2] == '##'
       
   147         ):  # look ahead carefully
   129             yield ('##', None, pos)
   148             yield ('##', None, pos)
   130             pos += 1 # skip ahead
   149             pos += 1  # skip ahead
   131         elif c in _simpleopletters: # handle simple operators
   150         elif c in _simpleopletters:  # handle simple operators
   132             yield (c, None, pos)
   151             yield (c, None, pos)
   133         elif (c in _quoteletters or c == 'r' and
   152         elif (
   134               program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
   153             c in _quoteletters
       
   154             or c == 'r'
       
   155             and program[pos : pos + 2] in ("r'", 'r"')
       
   156         ):  # handle quoted strings
   135             if c == 'r':
   157             if c == 'r':
   136                 pos += 1
   158                 pos += 1
   137                 c = program[pos]
   159                 c = program[pos]
   138                 decode = lambda x: x
   160                 decode = lambda x: x
   139             else:
   161             else:
   140                 decode = parser.unescapestr
   162                 decode = parser.unescapestr
   141             pos += 1
   163             pos += 1
   142             s = pos
   164             s = pos
   143             while pos < l: # find closing quote
   165             while pos < l:  # find closing quote
   144                 d = program[pos]
   166                 d = program[pos]
   145                 if d == '\\': # skip over escaped characters
   167                 if d == '\\':  # skip over escaped characters
   146                     pos += 2
   168                     pos += 2
   147                     continue
   169                     continue
   148                 if d == c:
   170                 if d == c:
   149                     yield ('string', decode(program[s:pos]), s)
   171                     yield ('string', decode(program[s:pos]), s)
   150                     break
   172                     break
   153                 raise error.ParseError(_("unterminated string"), s)
   175                 raise error.ParseError(_("unterminated string"), s)
   154         # gather up a symbol/keyword
   176         # gather up a symbol/keyword
   155         elif c in syminitletters:
   177         elif c in syminitletters:
   156             s = pos
   178             s = pos
   157             pos += 1
   179             pos += 1
   158             while pos < l: # find end of symbol
   180             while pos < l:  # find end of symbol
   159                 d = program[pos]
   181                 d = program[pos]
   160                 if d not in symletters:
   182                 if d not in symletters:
   161                     break
   183                     break
   162                 if d == '.' and program[pos - 1] == '.': # special case for ..
   184                 if d == '.' and program[pos - 1] == '.':  # special case for ..
   163                     pos -= 1
   185                     pos -= 1
   164                     break
   186                     break
   165                 pos += 1
   187                 pos += 1
   166             sym = program[s:pos]
   188             sym = program[s:pos]
   167             if sym in keywords: # operator keywords
   189             if sym in keywords:  # operator keywords
   168                 yield (sym, None, s)
   190                 yield (sym, None, s)
   169             elif '-' in sym:
   191             elif '-' in sym:
   170                 # some jerk gave us foo-bar-baz, try to check if it's a symbol
   192                 # some jerk gave us foo-bar-baz, try to check if it's a symbol
   171                 if lookup and lookup(sym):
   193                 if lookup and lookup(sym):
   172                     # looks like a real symbol
   194                     # looks like a real symbol
   173                     yield ('symbol', sym, s)
   195                     yield ('symbol', sym, s)
   174                 else:
   196                 else:
   175                     # looks like an expression
   197                     # looks like an expression
   176                     parts = sym.split('-')
   198                     parts = sym.split('-')
   177                     for p in parts[:-1]:
   199                     for p in parts[:-1]:
   178                         if p: # possible consecutive -
   200                         if p:  # possible consecutive -
   179                             yield ('symbol', p, s)
   201                             yield ('symbol', p, s)
   180                         s += len(p)
   202                         s += len(p)
   181                         yield ('-', None, s)
   203                         yield ('-', None, s)
   182                         s += 1
   204                         s += 1
   183                     if parts[-1]: # possible trailing -
   205                     if parts[-1]:  # possible trailing -
   184                         yield ('symbol', parts[-1], s)
   206                         yield ('symbol', parts[-1], s)
   185             else:
   207             else:
   186                 yield ('symbol', sym, s)
   208                 yield ('symbol', sym, s)
   187             pos -= 1
   209             pos -= 1
   188         else:
   210         else:
   189             raise error.ParseError(_("syntax error in revset '%s'") %
   211             raise error.ParseError(
   190                                    program, pos)
   212                 _("syntax error in revset '%s'") % program, pos
       
   213             )
   191         pos += 1
   214         pos += 1
   192     yield ('end', None, pos)
   215     yield ('end', None, pos)
   193 
   216 
       
   217 
   194 # helpers
   218 # helpers
   195 
   219 
   196 _notset = object()
   220 _notset = object()
       
   221 
   197 
   222 
   198 def getsymbol(x):
   223 def getsymbol(x):
   199     if x and x[0] == 'symbol':
   224     if x and x[0] == 'symbol':
   200         return x[1]
   225         return x[1]
   201     raise error.ParseError(_('not a symbol'))
   226     raise error.ParseError(_('not a symbol'))
   202 
   227 
       
   228 
   203 def getstring(x, err):
   229 def getstring(x, err):
   204     if x and (x[0] == 'string' or x[0] == 'symbol'):
   230     if x and (x[0] == 'string' or x[0] == 'symbol'):
   205         return x[1]
   231         return x[1]
   206     raise error.ParseError(err)
   232     raise error.ParseError(err)
       
   233 
   207 
   234 
   208 def getinteger(x, err, default=_notset):
   235 def getinteger(x, err, default=_notset):
   209     if not x and default is not _notset:
   236     if not x and default is not _notset:
   210         return default
   237         return default
   211     try:
   238     try:
   212         return int(getstring(x, err))
   239         return int(getstring(x, err))
   213     except ValueError:
   240     except ValueError:
   214         raise error.ParseError(err)
   241         raise error.ParseError(err)
   215 
   242 
       
   243 
   216 def getboolean(x, err):
   244 def getboolean(x, err):
   217     value = stringutil.parsebool(getsymbol(x))
   245     value = stringutil.parsebool(getsymbol(x))
   218     if value is not None:
   246     if value is not None:
   219         return value
   247         return value
   220     raise error.ParseError(err)
   248     raise error.ParseError(err)
       
   249 
   221 
   250 
   222 def getlist(x):
   251 def getlist(x):
   223     if not x:
   252     if not x:
   224         return []
   253         return []
   225     if x[0] == 'list':
   254     if x[0] == 'list':
   226         return list(x[1:])
   255         return list(x[1:])
   227     return [x]
   256     return [x]
       
   257 
   228 
   258 
   229 def getrange(x, err):
   259 def getrange(x, err):
   230     if not x:
   260     if not x:
   231         raise error.ParseError(err)
   261         raise error.ParseError(err)
   232     op = x[0]
   262     op = x[0]
   238         return x[1], None
   268         return x[1], None
   239     elif op == 'rangeall':
   269     elif op == 'rangeall':
   240         return None, None
   270         return None, None
   241     raise error.ParseError(err)
   271     raise error.ParseError(err)
   242 
   272 
       
   273 
   243 def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
   274 def getintrange(x, err1, err2, deffirst=_notset, deflast=_notset):
   244     """Get [first, last] integer range (both inclusive) from a parsed tree
   275     """Get [first, last] integer range (both inclusive) from a parsed tree
   245 
   276 
   246     If any of the sides omitted, and if no default provided, ParseError will
   277     If any of the sides omitted, and if no default provided, ParseError will
   247     be raised.
   278     be raised.
   250         n = getinteger(x, err1)
   281         n = getinteger(x, err1)
   251         return n, n
   282         return n, n
   252     a, b = getrange(x, err1)
   283     a, b = getrange(x, err1)
   253     return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
   284     return getinteger(a, err2, deffirst), getinteger(b, err2, deflast)
   254 
   285 
       
   286 
   255 def getargs(x, min, max, err):
   287 def getargs(x, min, max, err):
   256     l = getlist(x)
   288     l = getlist(x)
   257     if len(l) < min or (max >= 0 and len(l) > max):
   289     if len(l) < min or (max >= 0 and len(l) > max):
   258         raise error.ParseError(err)
   290         raise error.ParseError(err)
   259     return l
   291     return l
   260 
   292 
       
   293 
   261 def getargsdict(x, funcname, keys):
   294 def getargsdict(x, funcname, keys):
   262     return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
   295     return parser.buildargsdict(
   263                                 keyvaluenode='keyvalue', keynode='symbol')
   296         getlist(x),
       
   297         funcname,
       
   298         parser.splitargspec(keys),
       
   299         keyvaluenode='keyvalue',
       
   300         keynode='symbol',
       
   301     )
       
   302 
   264 
   303 
   265 # cache of {spec: raw parsed tree} built internally
   304 # cache of {spec: raw parsed tree} built internally
   266 _treecache = {}
   305 _treecache = {}
       
   306 
   267 
   307 
   268 def _cachedtree(spec):
   308 def _cachedtree(spec):
   269     # thread safe because parse() is reentrant and dict.__setitem__() is atomic
   309     # thread safe because parse() is reentrant and dict.__setitem__() is atomic
   270     tree = _treecache.get(spec)
   310     tree = _treecache.get(spec)
   271     if tree is None:
   311     if tree is None:
   272         _treecache[spec] = tree = parse(spec)
   312         _treecache[spec] = tree = parse(spec)
   273     return tree
   313     return tree
   274 
   314 
       
   315 
   275 def _build(tmplspec, *repls):
   316 def _build(tmplspec, *repls):
   276     """Create raw parsed tree from a template revset statement
   317     """Create raw parsed tree from a template revset statement
   277 
   318 
   278     >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
   319     >>> _build(b'f(_) and _', (b'string', b'1'), (b'symbol', b'2'))
   279     ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
   320     ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
   280     """
   321     """
   281     template = _cachedtree(tmplspec)
   322     template = _cachedtree(tmplspec)
   282     return parser.buildtree(template, ('symbol', '_'), *repls)
   323     return parser.buildtree(template, ('symbol', '_'), *repls)
       
   324 
   283 
   325 
   284 def _match(patspec, tree):
   326 def _match(patspec, tree):
   285     """Test if a tree matches the given pattern statement; return the matches
   327     """Test if a tree matches the given pattern statement; return the matches
   286 
   328 
   287     >>> _match(b'f(_)', parse(b'f()'))
   329     >>> _match(b'f(_)', parse(b'f()'))
   288     >>> _match(b'f(_)', parse(b'f(1)'))
   330     >>> _match(b'f(_)', parse(b'f(1)'))
   289     [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
   331     [('func', ('symbol', 'f'), ('symbol', '1')), ('symbol', '1')]
   290     >>> _match(b'f(_)', parse(b'f(1, 2)'))
   332     >>> _match(b'f(_)', parse(b'f(1, 2)'))
   291     """
   333     """
   292     pattern = _cachedtree(patspec)
   334     pattern = _cachedtree(patspec)
   293     return parser.matchtree(pattern, tree, ('symbol', '_'),
   335     return parser.matchtree(
   294                             {'keyvalue', 'list'})
   336         pattern, tree, ('symbol', '_'), {'keyvalue', 'list'}
       
   337     )
       
   338 
   295 
   339 
   296 def _matchonly(revs, bases):
   340 def _matchonly(revs, bases):
   297     return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
   341     return _match('ancestors(_) and not ancestors(_)', ('and', revs, bases))
       
   342 
   298 
   343 
   299 def _fixops(x):
   344 def _fixops(x):
   300     """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
   345     """Rewrite raw parsed tree to resolve ambiguous syntax which cannot be
   301     handled well by our simple top-down parser"""
   346     handled well by our simple top-down parser"""
   302     if not isinstance(x, tuple):
   347     if not isinstance(x, tuple):
   322     elif op == 'subscript' and x[1][0] == 'relation':
   367     elif op == 'subscript' and x[1][0] == 'relation':
   323         # x#y[z] ternary
   368         # x#y[z] ternary
   324         return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
   369         return _fixops(('relsubscript', x[1][1], x[1][2], x[2]))
   325 
   370 
   326     return (op,) + tuple(_fixops(y) for y in x[1:])
   371     return (op,) + tuple(_fixops(y) for y in x[1:])
       
   372 
   327 
   373 
   328 def _analyze(x):
   374 def _analyze(x):
   329     if x is None:
   375     if x is None:
   330         return x
   376         return x
   331 
   377 
   351         return (op, None)
   397         return (op, None)
   352     elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
   398     elif op in {'or', 'not', 'rangepre', 'rangepost', 'parentpost'}:
   353         return (op, _analyze(x[1]))
   399         return (op, _analyze(x[1]))
   354     elif op == 'group':
   400     elif op == 'group':
   355         return _analyze(x[1])
   401         return _analyze(x[1])
   356     elif op in {'and', 'dagrange', 'range', 'parent', 'ancestor', 'relation',
   402     elif op in {
   357                 'subscript'}:
   403         'and',
       
   404         'dagrange',
       
   405         'range',
       
   406         'parent',
       
   407         'ancestor',
       
   408         'relation',
       
   409         'subscript',
       
   410     }:
   358         ta = _analyze(x[1])
   411         ta = _analyze(x[1])
   359         tb = _analyze(x[2])
   412         tb = _analyze(x[2])
   360         return (op, ta, tb)
   413         return (op, ta, tb)
   361     elif op == 'relsubscript':
   414     elif op == 'relsubscript':
   362         ta = _analyze(x[1])
   415         ta = _analyze(x[1])
   369         return (op, x[1], _analyze(x[2]))
   422         return (op, x[1], _analyze(x[2]))
   370     elif op == 'func':
   423     elif op == 'func':
   371         return (op, x[1], _analyze(x[2]))
   424         return (op, x[1], _analyze(x[2]))
   372     raise ValueError('invalid operator %r' % op)
   425     raise ValueError('invalid operator %r' % op)
   373 
   426 
       
   427 
   374 def analyze(x):
   428 def analyze(x):
   375     """Transform raw parsed tree to evaluatable tree which can be fed to
   429     """Transform raw parsed tree to evaluatable tree which can be fed to
   376     optimize() or getset()
   430     optimize() or getset()
   377 
   431 
   378     All pseudo operations should be mapped to real operations or functions
   432     All pseudo operations should be mapped to real operations or functions
   379     defined in methods or symbols table respectively.
   433     defined in methods or symbols table respectively.
   380     """
   434     """
   381     return _analyze(x)
   435     return _analyze(x)
   382 
   436 
       
   437 
   383 def _optimize(x):
   438 def _optimize(x):
   384     if x is None:
   439     if x is None:
   385         return 0, x
   440         return 0, x
   386 
   441 
   387     op = x[0]
   442     op = x[0]
   388     if op in ('string', 'symbol', 'smartset'):
   443     if op in ('string', 'symbol', 'smartset'):
   389         return 0.5, x # single revisions are small
   444         return 0.5, x  # single revisions are small
   390     elif op == 'and':
   445     elif op == 'and':
   391         wa, ta = _optimize(x[1])
   446         wa, ta = _optimize(x[1])
   392         wb, tb = _optimize(x[2])
   447         wb, tb = _optimize(x[2])
   393         w = min(wa, wb)
   448         w = min(wa, wb)
   394 
   449 
   410         return w, (op, ta, tb)
   465         return w, (op, ta, tb)
   411     elif op == 'or':
   466     elif op == 'or':
   412         # fast path for machine-generated expression, that is likely to have
   467         # fast path for machine-generated expression, that is likely to have
   413         # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
   468         # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()'
   414         ws, ts, ss = [], [], []
   469         ws, ts, ss = [], [], []
       
   470 
   415         def flushss():
   471         def flushss():
   416             if not ss:
   472             if not ss:
   417                 return
   473                 return
   418             if len(ss) == 1:
   474             if len(ss) == 1:
   419                 w, t = ss[0]
   475                 w, t = ss[0]
   422                 y = _build('_list(_)', ('string', s))
   478                 y = _build('_list(_)', ('string', s))
   423                 w, t = _optimize(y)
   479                 w, t = _optimize(y)
   424             ws.append(w)
   480             ws.append(w)
   425             ts.append(t)
   481             ts.append(t)
   426             del ss[:]
   482             del ss[:]
       
   483 
   427         for y in getlist(x[1]):
   484         for y in getlist(x[1]):
   428             w, t = _optimize(y)
   485             w, t = _optimize(y)
   429             if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
   486             if t is not None and (t[0] == 'string' or t[0] == 'symbol'):
   430                 ss.append((w, t))
   487                 ss.append((w, t))
   431                 continue
   488                 continue
   432             flushss()
   489             flushss()
   433             ws.append(w)
   490             ws.append(w)
   434             ts.append(t)
   491             ts.append(t)
   435         flushss()
   492         flushss()
   436         if len(ts) == 1:
   493         if len(ts) == 1:
   437             return ws[0], ts[0] # 'or' operation is fully optimized out
   494             return ws[0], ts[0]  # 'or' operation is fully optimized out
   438         return max(ws), (op, ('list',) + tuple(ts))
   495         return max(ws), (op, ('list',) + tuple(ts))
   439     elif op == 'not':
   496     elif op == 'not':
   440         # Optimize not public() to _notpublic() because we have a fast version
   497         # Optimize not public() to _notpublic() because we have a fast version
   441         if _match('public()', x[1]):
   498         if _match('public()', x[1]):
   442             o = _optimize(_build('_notpublic()'))
   499             o = _optimize(_build('_notpublic()'))
   476             return w + wa, _build('_commonancestorheads(_)', m[1])
   533             return w + wa, _build('_commonancestorheads(_)', m[1])
   477 
   534 
   478         return w + wa, (op, x[1], ta)
   535         return w + wa, (op, x[1], ta)
   479     raise ValueError('invalid operator %r' % op)
   536     raise ValueError('invalid operator %r' % op)
   480 
   537 
       
   538 
   481 def optimize(tree):
   539 def optimize(tree):
   482     """Optimize evaluatable tree
   540     """Optimize evaluatable tree
   483 
   541 
   484     All pseudo operations should be transformed beforehand.
   542     All pseudo operations should be transformed beforehand.
   485     """
   543     """
   486     _weight, newtree = _optimize(tree)
   544     _weight, newtree = _optimize(tree)
   487     return newtree
   545     return newtree
   488 
   546 
       
   547 
   489 # the set of valid characters for the initial letter of symbols in
   548 # the set of valid characters for the initial letter of symbols in
   490 # alias declarations and definitions
   549 # alias declarations and definitions
   491 _aliassyminitletters = _syminitletters | {'$'}
   550 _aliassyminitletters = _syminitletters | {'$'}
       
   551 
   492 
   552 
   493 def _parsewith(spec, lookup=None, syminitletters=None):
   553 def _parsewith(spec, lookup=None, syminitletters=None):
   494     """Generate a parse tree of given spec with given tokenizing options
   554     """Generate a parse tree of given spec with given tokenizing options
   495 
   555 
   496     >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
   556     >>> _parsewith(b'foo($1)', syminitletters=_aliassyminitletters)
   505     ParseError: ('invalid token', 4)
   565     ParseError: ('invalid token', 4)
   506     """
   566     """
   507     if lookup and spec.startswith('revset(') and spec.endswith(')'):
   567     if lookup and spec.startswith('revset(') and spec.endswith(')'):
   508         lookup = None
   568         lookup = None
   509     p = parser.parser(elements)
   569     p = parser.parser(elements)
   510     tree, pos = p.parse(tokenize(spec, lookup=lookup,
   570     tree, pos = p.parse(
   511                                  syminitletters=syminitletters))
   571         tokenize(spec, lookup=lookup, syminitletters=syminitletters)
       
   572     )
   512     if pos != len(spec):
   573     if pos != len(spec):
   513         raise error.ParseError(_('invalid token'), pos)
   574         raise error.ParseError(_('invalid token'), pos)
   514     return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
   575     return _fixops(parser.simplifyinfixops(tree, ('list', 'or')))
   515 
   576 
       
   577 
   516 class _aliasrules(parser.basealiasrules):
   578 class _aliasrules(parser.basealiasrules):
   517     """Parsing and expansion rule set of revset aliases"""
   579     """Parsing and expansion rule set of revset aliases"""
       
   580 
   518     _section = _('revset alias')
   581     _section = _('revset alias')
   519 
   582 
   520     @staticmethod
   583     @staticmethod
   521     def _parse(spec):
   584     def _parse(spec):
   522         """Parse alias declaration/definition ``spec``
   585         """Parse alias declaration/definition ``spec``
   529 
   592 
   530     @staticmethod
   593     @staticmethod
   531     def _trygetfunc(tree):
   594     def _trygetfunc(tree):
   532         if tree[0] == 'func' and tree[1][0] == 'symbol':
   595         if tree[0] == 'func' and tree[1][0] == 'symbol':
   533             return tree[1][1], getlist(tree[2])
   596             return tree[1][1], getlist(tree[2])
       
   597 
   534 
   598 
   535 def expandaliases(tree, aliases, warn=None):
   599 def expandaliases(tree, aliases, warn=None):
   536     """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
   600     """Expand aliases in a tree, aliases is a list of (name, value) tuples"""
   537     aliases = _aliasrules.buildmap(aliases)
   601     aliases = _aliasrules.buildmap(aliases)
   538     tree = _aliasrules.expand(aliases, tree)
   602     tree = _aliasrules.expand(aliases, tree)
   542             if alias.error and not alias.warned:
   606             if alias.error and not alias.warned:
   543                 warn(_('warning: %s\n') % (alias.error))
   607                 warn(_('warning: %s\n') % (alias.error))
   544                 alias.warned = True
   608                 alias.warned = True
   545     return tree
   609     return tree
   546 
   610 
       
   611 
   547 def foldconcat(tree):
   612 def foldconcat(tree):
   548     """Fold elements to be concatenated by `##`
   613     """Fold elements to be concatenated by `##`
   549     """
   614     """
   550     if (not isinstance(tree, tuple)
   615     if not isinstance(tree, tuple) or tree[0] in (
   551         or tree[0] in ('string', 'symbol', 'smartset')):
   616         'string',
       
   617         'symbol',
       
   618         'smartset',
       
   619     ):
   552         return tree
   620         return tree
   553     if tree[0] == '_concat':
   621     if tree[0] == '_concat':
   554         pending = [tree]
   622         pending = [tree]
   555         l = []
   623         l = []
   556         while pending:
   624         while pending:
   564                 raise error.ParseError(msg)
   632                 raise error.ParseError(msg)
   565         return ('string', ''.join(l))
   633         return ('string', ''.join(l))
   566     else:
   634     else:
   567         return tuple(foldconcat(t) for t in tree)
   635         return tuple(foldconcat(t) for t in tree)
   568 
   636 
       
   637 
   569 def parse(spec, lookup=None):
   638 def parse(spec, lookup=None):
   570     try:
   639     try:
   571         return _parsewith(spec, lookup=lookup)
   640         return _parsewith(spec, lookup=lookup)
   572     except error.ParseError as inst:
   641     except error.ParseError as inst:
   573         if len(inst.args) > 1:  # has location
   642         if len(inst.args) > 1:  # has location
   579             # start. Therefore, we print "loc + 1" spaces (instead of "loc")
   648             # start. Therefore, we print "loc + 1" spaces (instead of "loc")
   580             # to line up the caret with the location of the error.
   649             # to line up the caret with the location of the error.
   581             inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
   650             inst.hint = spec + '\n' + ' ' * (loc + 1) + '^ ' + _('here')
   582         raise
   651         raise
   583 
   652 
       
   653 
   584 def _quote(s):
   654 def _quote(s):
   585     r"""Quote a value in order to make it safe for the revset engine.
   655     r"""Quote a value in order to make it safe for the revset engine.
   586 
   656 
   587     >>> _quote(b'asdf')
   657     >>> _quote(b'asdf')
   588     "'asdf'"
   658     "'asdf'"
   593     >>> _quote(1)
   663     >>> _quote(1)
   594     "'1'"
   664     "'1'"
   595     """
   665     """
   596     return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
   666     return "'%s'" % stringutil.escapestr(pycompat.bytestr(s))
   597 
   667 
       
   668 
   598 def _formatargtype(c, arg):
   669 def _formatargtype(c, arg):
   599     if c == 'd':
   670     if c == 'd':
   600         return '_rev(%d)' % int(arg)
   671         return '_rev(%d)' % int(arg)
   601     elif c == 's':
   672     elif c == 's':
   602         return _quote(arg)
   673         return _quote(arg)
   603     elif c == 'r':
   674     elif c == 'r':
   604         if not isinstance(arg, bytes):
   675         if not isinstance(arg, bytes):
   605             raise TypeError
   676             raise TypeError
   606         parse(arg) # make sure syntax errors are confined
   677         parse(arg)  # make sure syntax errors are confined
   607         return '(%s)' % arg
   678         return '(%s)' % arg
   608     elif c == 'n':
   679     elif c == 'n':
   609         return _quote(node.hex(arg))
   680         return _quote(node.hex(arg))
   610     elif c == 'b':
   681     elif c == 'b':
   611         try:
   682         try:
   612             return _quote(arg.branch())
   683             return _quote(arg.branch())
   613         except AttributeError:
   684         except AttributeError:
   614             raise TypeError
   685             raise TypeError
   615     raise error.ParseError(_('unexpected revspec format character %s') % c)
   686     raise error.ParseError(_('unexpected revspec format character %s') % c)
       
   687 
   616 
   688 
   617 def _formatlistexp(s, t):
   689 def _formatlistexp(s, t):
   618     l = len(s)
   690     l = len(s)
   619     if l == 0:
   691     if l == 0:
   620         return "_list('')"
   692         return "_list('')"
   633             raise TypeError
   705             raise TypeError
   634 
   706 
   635     m = l // 2
   707     m = l // 2
   636     return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
   708     return '(%s or %s)' % (_formatlistexp(s[:m], t), _formatlistexp(s[m:], t))
   637 
   709 
       
   710 
   638 def _formatintlist(data):
   711 def _formatintlist(data):
   639     try:
   712     try:
   640         l = len(data)
   713         l = len(data)
   641         if l == 0:
   714         if l == 0:
   642             return "_list('')"
   715             return "_list('')"
   644             return _formatargtype('d', data[0])
   717             return _formatargtype('d', data[0])
   645         return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
   718         return "_intlist('%s')" % "\0".join('%d' % int(a) for a in data)
   646     except (TypeError, ValueError):
   719     except (TypeError, ValueError):
   647         raise error.ParseError(_('invalid argument for revspec'))
   720         raise error.ParseError(_('invalid argument for revspec'))
   648 
   721 
       
   722 
   649 def _formatparamexp(args, t):
   723 def _formatparamexp(args, t):
   650     return ', '.join(_formatargtype(t, a) for a in args)
   724     return ', '.join(_formatargtype(t, a) for a in args)
       
   725 
   651 
   726 
   652 _formatlistfuncs = {
   727 _formatlistfuncs = {
   653     'l': _formatlistexp,
   728     'l': _formatlistexp,
   654     'p': _formatparamexp,
   729     'p': _formatparamexp,
   655 }
   730 }
       
   731 
   656 
   732 
   657 def formatspec(expr, *args):
   733 def formatspec(expr, *args):
   658     '''
   734     '''
   659     This is a convenience function for using revsets internally, and
   735     This is a convenience function for using revsets internally, and
   660     escapes arguments appropriately. Aliases are intentionally ignored
   736     escapes arguments appropriately. Aliases are intentionally ignored
   702             ret.append(_formatintlist(list(arg)))
   778             ret.append(_formatintlist(list(arg)))
   703         else:
   779         else:
   704             raise error.ProgrammingError("unknown revspec item type: %r" % t)
   780             raise error.ProgrammingError("unknown revspec item type: %r" % t)
   705     return b''.join(ret)
   781     return b''.join(ret)
   706 
   782 
       
   783 
   707 def spectree(expr, *args):
   784 def spectree(expr, *args):
   708     """similar to formatspec but return a parsed and optimized tree"""
   785     """similar to formatspec but return a parsed and optimized tree"""
   709     parsed = _parseargs(expr, args)
   786     parsed = _parseargs(expr, args)
   710     ret = []
   787     ret = []
   711     inputs = []
   788     inputs = []
   723     tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
   800     tree = parser.buildtree(tree, ('symbol', '$'), *inputs)
   724     tree = foldconcat(tree)
   801     tree = foldconcat(tree)
   725     tree = analyze(tree)
   802     tree = analyze(tree)
   726     tree = optimize(tree)
   803     tree = optimize(tree)
   727     return tree
   804     return tree
       
   805 
   728 
   806 
   729 def _parseargs(expr, args):
   807 def _parseargs(expr, args):
   730     """parse the expression and replace all inexpensive args
   808     """parse the expression and replace all inexpensive args
   731 
   809 
   732     return a list of tuple [(arg-type, arg-value)]
   810     return a list of tuple [(arg-type, arg-value)]
   761             raise error.ParseError(_('missing argument for revspec'))
   839             raise error.ParseError(_('missing argument for revspec'))
   762         f = _formatlistfuncs.get(d)
   840         f = _formatlistfuncs.get(d)
   763         if f:
   841         if f:
   764             # a list of some type, might be expensive, do not replace
   842             # a list of some type, might be expensive, do not replace
   765             pos += 1
   843             pos += 1
   766             islist = (d == 'l')
   844             islist = d == 'l'
   767             try:
   845             try:
   768                 d = expr[pos]
   846                 d = expr[pos]
   769             except IndexError:
   847             except IndexError:
   770                 raise error.ParseError(_('incomplete revspec format character'))
   848                 raise error.ParseError(_('incomplete revspec format character'))
   771             if islist and d == 'd' and arg:
   849             if islist and d == 'd' and arg:
   792         raise error.ParseError(_('too many revspec arguments specified'))
   870         raise error.ParseError(_('too many revspec arguments specified'))
   793     except StopIteration:
   871     except StopIteration:
   794         pass
   872         pass
   795     return ret
   873     return ret
   796 
   874 
       
   875 
   797 def prettyformat(tree):
   876 def prettyformat(tree):
   798     return parser.prettyformat(tree, ('string', 'symbol'))
   877     return parser.prettyformat(tree, ('string', 'symbol'))
       
   878 
   799 
   879 
   800 def depth(tree):
   880 def depth(tree):
   801     if isinstance(tree, tuple):
   881     if isinstance(tree, tuple):
   802         return max(map(depth, tree)) + 1
   882         return max(map(depth, tree)) + 1
   803     else:
   883     else:
   804         return 0
   884         return 0
       
   885 
   805 
   886 
   806 def funcsused(tree):
   887 def funcsused(tree):
   807     if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
   888     if not isinstance(tree, tuple) or tree[0] in ('string', 'symbol'):
   808         return set()
   889         return set()
   809     else:
   890     else:
   812             funcs |= funcsused(s)
   893             funcs |= funcsused(s)
   813         if tree[0] == 'func':
   894         if tree[0] == 'func':
   814             funcs.add(tree[1][1])
   895             funcs.add(tree[1][1])
   815         return funcs
   896         return funcs
   816 
   897 
       
   898 
   817 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
   899 _hashre = util.re.compile('[0-9a-fA-F]{1,40}$')
       
   900 
   818 
   901 
   819 def _ishashlikesymbol(symbol):
   902 def _ishashlikesymbol(symbol):
   820     """returns true if the symbol looks like a hash"""
   903     """returns true if the symbol looks like a hash"""
   821     return _hashre.match(symbol)
   904     return _hashre.match(symbol)
       
   905 
   822 
   906 
   823 def gethashlikesymbols(tree):
   907 def gethashlikesymbols(tree):
   824     """returns the list of symbols of the tree that look like hashes
   908     """returns the list of symbols of the tree that look like hashes
   825 
   909 
   826     >>> gethashlikesymbols(parse(b'3::abe3ff'))
   910     >>> gethashlikesymbols(parse(b'3::abe3ff'))