mercurial/revset.py
changeset 23842 91dbb98b3513
parent 23836 3fb61fcbc4e4
child 23843 c4d0c3d05721
equal deleted inserted replaced
23841:9d25bb84cf6c 23842:91dbb98b3513
   127     "end": (0, None, None),
   127     "end": (0, None, None),
   128 }
   128 }
   129 
   129 
   130 keywords = set(['and', 'or', 'not'])
   130 keywords = set(['and', 'or', 'not'])
   131 
   131 
   132 def tokenize(program, lookup=None):
   132 # default set of valid characters for the initial letter of symbols
       
   133 _syminitletters = set(c for c in [chr(i) for i in xrange(256)]
       
   134                       if c.isalnum() or c in '._@' or ord(c) > 127)
       
   135 
       
   136 # default set of valid characters for non-initial letters of symbols
       
   137 _symletters = set(c for c in  [chr(i) for i in xrange(256)]
       
   138                   if c.isalnum() or c in '-._/@' or ord(c) > 127)
       
   139 
       
   140 def tokenize(program, lookup=None, syminitletters=None, symletters=None):
   133     '''
   141     '''
   134     Parse a revset statement into a stream of tokens
   142     Parse a revset statement into a stream of tokens
       
   143 
       
   144     ``syminitletters`` is the set of valid characters for the initial
       
   145     letter of symbols.
       
   146 
       
   147     By default, character ``c`` is recognized as valid for initial
       
   148     letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
       
   149 
       
   150     ``symletters`` is the set of valid characters for non-initial
       
   151     letters of symbols.
       
   152 
       
   153     By default, character ``c`` is recognized as valid for non-initial
       
   154     letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
   135 
   155 
   136     Check that @ is a valid unquoted token character (issue3686):
   156     Check that @ is a valid unquoted token character (issue3686):
   137     >>> list(tokenize("@::"))
   157     >>> list(tokenize("@::"))
   138     [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
   158     [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
   139 
   159 
   140     '''
   160     '''
       
   161     if syminitletters is None:
       
   162         syminitletters = _syminitletters
       
   163     if symletters is None:
       
   164         symletters = _symletters
   141 
   165 
   142     pos, l = 0, len(program)
   166     pos, l = 0, len(program)
   143     while pos < l:
   167     while pos < l:
   144         c = program[pos]
   168         c = program[pos]
   145         if c.isspace(): # skip inter-token whitespace
   169         if c.isspace(): # skip inter-token whitespace
   175                     break
   199                     break
   176                 pos += 1
   200                 pos += 1
   177             else:
   201             else:
   178                 raise error.ParseError(_("unterminated string"), s)
   202                 raise error.ParseError(_("unterminated string"), s)
   179         # gather up a symbol/keyword
   203         # gather up a symbol/keyword
   180         elif c.isalnum() or c in '._@' or ord(c) > 127:
   204         elif c in syminitletters:
   181             s = pos
   205             s = pos
   182             pos += 1
   206             pos += 1
   183             while pos < l: # find end of symbol
   207             while pos < l: # find end of symbol
   184                 d = program[pos]
   208                 d = program[pos]
   185                 if not (d.isalnum() or d in "-._/@" or ord(d) > 127):
   209                 if d not in symletters:
   186                     break
   210                     break
   187                 if d == '.' and program[pos - 1] == '.': # special case for ..
   211                 if d == '.' and program[pos - 1] == '.': # special case for ..
   188                     pos -= 1
   212                     pos -= 1
   189                     break
   213                     break
   190                 pos += 1
   214                 pos += 1