mercurial/revset.py
changeset 23842 91dbb98b3513
parent 23836 3fb61fcbc4e4
child 23843 c4d0c3d05721
--- a/mercurial/revset.py	Fri Jan 09 18:38:02 2015 +0100
+++ b/mercurial/revset.py	Sat Jan 10 23:18:11 2015 +0900
@@ -129,15 +129,39 @@
 
 keywords = set(['and', 'or', 'not'])
 
-def tokenize(program, lookup=None):
+# default set of valid characters for the initial letter of symbols
+_syminitletters = set(c for c in [chr(i) for i in xrange(256)]
+                      if c.isalnum() or c in '._@' or ord(c) > 127)
+
+# default set of valid characters for non-initial letters of symbols
+_symletters = set(c for c in  [chr(i) for i in xrange(256)]
+                  if c.isalnum() or c in '-._/@' or ord(c) > 127)
+
+def tokenize(program, lookup=None, syminitletters=None, symletters=None):
     '''
     Parse a revset statement into a stream of tokens
 
+    ``syminitletters`` is the set of valid characters for the initial
+    letter of symbols.
+
+    By default, character ``c`` is recognized as valid for initial
+    letter of symbols, if ``c.isalnum() or c in '._@' or ord(c) > 127``.
+
+    ``symletters`` is the set of valid characters for non-initial
+    letters of symbols.
+
+    By default, character ``c`` is recognized as valid for non-initial
+    letters of symbols, if ``c.isalnum() or c in '-._/@' or ord(c) > 127``.
+
     Check that @ is a valid unquoted token character (issue3686):
     >>> list(tokenize("@::"))
     [('symbol', '@', 0), ('::', None, 1), ('end', None, 3)]
 
     '''
+    if syminitletters is None:
+        syminitletters = _syminitletters
+    if symletters is None:
+        symletters = _symletters
 
     pos, l = 0, len(program)
     while pos < l:
@@ -177,12 +201,12 @@
             else:
                 raise error.ParseError(_("unterminated string"), s)
         # gather up a symbol/keyword
-        elif c.isalnum() or c in '._@' or ord(c) > 127:
+        elif c in syminitletters:
             s = pos
             pos += 1
             while pos < l: # find end of symbol
                 d = program[pos]
-                if not (d.isalnum() or d in "-._/@" or ord(d) > 127):
+                if d not in symletters:
                     break
                 if d == '.' and program[pos - 1] == '.': # special case for ..
                     pos -= 1