comparison mercurial/revsetlang.py @ 41221:73203cdfe3fe

revset: detect integer list on parsing Right now, using "%ld" with `repo.revs("…%ld…", somerevs)` is very inefficient, all items in `somerevs` will be serialized to ascii and then reparsed as integers. If `somerevs` contains just an handful of entry this is fine, however, when you get to thousands or hundreds of thousands of revisions this becomes very slow. To avoid this serialization we need to first detect this situation. The code involved in the whole process is quite complex so we start simple and focus on some "simple" but widespread cases. So far we only detect the situation and don't do anything special about it. The singled out will be serialized in `formatspec` in the same way as before.
author Boris Feld <boris.feld@octobus.net>
date Fri, 04 Jan 2019 05:26:13 +0100
parents 8d26026b3335
children 8aca89a694d4
comparison
equal deleted inserted replaced
41220:8d26026b3335 41221:73203cdfe3fe
13 from . import ( 13 from . import (
14 error, 14 error,
15 node, 15 node,
16 parser, 16 parser,
17 pycompat, 17 pycompat,
18 smartset,
18 util, 19 util,
19 ) 20 )
20 from .utils import ( 21 from .utils import (
21 stringutil, 22 stringutil,
22 ) 23 )
680 parsed = _parseargs(expr, args) 681 parsed = _parseargs(expr, args)
681 ret = [] 682 ret = []
682 for t, arg in parsed: 683 for t, arg in parsed:
683 if t is None: 684 if t is None:
684 ret.append(arg) 685 ret.append(arg)
686 elif t == 'baseset':
687 if isinstance(arg, set):
688 arg = sorted(arg)
689 ret.append(_formatintlist(list(arg)))
685 else: 690 else:
686 raise error.ProgrammingError("unknown revspec item type: %r" % t) 691 raise error.ProgrammingError("unknown revspec item type: %r" % t)
687 return b''.join(ret) 692 return b''.join(ret)
688 693
689 def _parseargs(expr, args): 694 def _parseargs(expr, args):
690 """parse the expression and replace all inexpensive args 695 """parse the expression and replace all inexpensive args
691 696
692 return a list of tuple [(arg-type, arg-value)] 697 return a list of tuple [(arg-type, arg-value)]
693 698
694 Arg-type can be: 699 Arg-type can be:
695 * None: a string ready to be concatenated into a final spec 700 * None: a string ready to be concatenated into a final spec
701 * 'baseset': an iterable of revisions
696 """ 702 """
697 expr = pycompat.bytestr(expr) 703 expr = pycompat.bytestr(expr)
698 argiter = iter(args) 704 argiter = iter(args)
699 ret = [] 705 ret = []
700 pos = 0 706 pos = 0
720 raise error.ParseError(_('missing argument for revspec')) 726 raise error.ParseError(_('missing argument for revspec'))
721 f = _formatlistfuncs.get(d) 727 f = _formatlistfuncs.get(d)
722 if f: 728 if f:
723 # a list of some type, might be expensive, do not replace 729 # a list of some type, might be expensive, do not replace
724 pos += 1 730 pos += 1
731 islist = (d == 'l')
725 try: 732 try:
726 d = expr[pos] 733 d = expr[pos]
727 except IndexError: 734 except IndexError:
728 raise error.ParseError(_('incomplete revspec format character')) 735 raise error.ParseError(_('incomplete revspec format character'))
736 if islist and d == 'd' and arg:
737 # special case, we might be able to speedup the list of int case
738 #
739 # We have been very conservative here for the first version.
740 # Other types (eg: generator) are probably fine, but we did not
741 # wanted to take any risk>
742 safeinputtype = (list, tuple, set, smartset.abstractsmartset)
743 if isinstance(arg, safeinputtype):
744 # we don't create a baseset yet, because it come with an
745 # extra cost. If we are going to serialize it we better
746 # skip it.
747 ret.append(('baseset', arg))
748 pos += 1
749 continue
729 try: 750 try:
730 ret.append((None, f(list(arg), d))) 751 ret.append((None, f(list(arg), d)))
731 except (TypeError, ValueError): 752 except (TypeError, ValueError):
732 raise error.ParseError(_('invalid argument for revspec')) 753 raise error.ParseError(_('invalid argument for revspec'))
733 else: 754 else: