comparison mercurial/revsetlang.py @ 34044:b862e6fca7ac

revsetlang: build optimized tree by helper function This should make optimize() more readable, but it doubles the parsing cost. (original) $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \ 'L.optimize(L.analyze(L.parse("::tip")))' 10000 loops, best of 3: 18.1 usec per loop (this patch) $ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \ 'L._treecache.clear(); L.optimize(L.analyze(L.parse("::tip")))' 10000 loops, best of 3: 48.4 usec per loop 30usec isn't dominant compared to the revset evaluation, but that is a cost. That's why a parsed tree is cached, which can benefit in hgweb or chg server.
author Yuya Nishihara <yuya@tcha.org>
date Wed, 17 Feb 2016 21:38:25 +0900
parents 37b82485097f
children f23cbca9b277
comparison
equal deleted inserted replaced
34043:90896b61fe26 34044:b862e6fca7ac
237 237
238 def getargsdict(x, funcname, keys): 238 def getargsdict(x, funcname, keys):
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), 239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys),
240 keyvaluenode='keyvalue', keynode='symbol') 240 keyvaluenode='keyvalue', keynode='symbol')
241 241
242 # cache of {spec: raw parsed tree} built internally
243 _treecache = {}
244
245 def _cachedtree(spec):
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic
247 tree = _treecache.get(spec)
248 if tree is None:
249 _treecache[spec] = tree = parse(spec)
250 return tree
251
252 def _build(tmplspec, *repls):
253 """Create raw parsed tree from a template revset statement
254
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2'))
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2'))
257 """
258 template = _cachedtree(tmplspec)
259 return parser.buildtree(template, ('symbol', '_'), *repls)
260
242 def _isnamedfunc(x, funcname): 261 def _isnamedfunc(x, funcname):
243 """Check if given tree matches named function""" 262 """Check if given tree matches named function"""
244 return x and x[0] == 'func' and getsymbol(x[1]) == funcname 263 return x and x[0] == 'func' and getsymbol(x[1]) == funcname
245 264
246 def _isposargs(x, n): 265 def _isposargs(x, n):
300 if x is None: 319 if x is None:
301 return x 320 return x
302 321
303 op = x[0] 322 op = x[0]
304 if op == 'minus': 323 if op == 'minus':
305 return _analyze(('and', x[1], ('not', x[2]))) 324 return _analyze(_build('_ and not _', *x[1:]))
306 elif op == 'only': 325 elif op == 'only':
307 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) 326 return _analyze(_build('only(_, _)', *x[1:]))
308 return _analyze(t)
309 elif op == 'onlypost': 327 elif op == 'onlypost':
310 return _analyze(('func', ('symbol', 'only'), x[1])) 328 return _analyze(_build('only(_)', x[1]))
311 elif op == 'dagrangepre': 329 elif op == 'dagrangepre':
312 return _analyze(('func', ('symbol', 'ancestors'), x[1])) 330 return _analyze(_build('ancestors(_)', x[1]))
313 elif op == 'dagrangepost': 331 elif op == 'dagrangepost':
314 return _analyze(('func', ('symbol', 'descendants'), x[1])) 332 return _analyze(_build('descendants(_)', x[1]))
315 elif op == 'negate': 333 elif op == 'negate':
316 s = getstring(x[1], _("can't negate that")) 334 s = getstring(x[1], _("can't negate that"))
317 return _analyze(('string', '-' + s)) 335 return _analyze(('string', '-' + s))
318 elif op in ('string', 'symbol'): 336 elif op in ('string', 'symbol'):
319 return x 337 return x
365 wa, ta = _optimize(x[1], True) 383 wa, ta = _optimize(x[1], True)
366 wb, tb = _optimize(x[2], True) 384 wb, tb = _optimize(x[2], True)
367 w = min(wa, wb) 385 w = min(wa, wb)
368 386
369 # (::x and not ::y)/(not ::y and ::x) have a fast path 387 # (::x and not ::y)/(not ::y and ::x) have a fast path
370 tm = _matchonly(ta, tb) or _matchonly(tb, ta) 388 m = _matchonly(ta, tb) or _matchonly(tb, ta)
371 if tm: 389 if m:
372 return w, ('func', ('symbol', 'only'), tm) 390 return w, _build('only(_, _)', *m[1:])
373 391
374 if tb is not None and tb[0] == 'not': 392 if tb is not None and tb[0] == 'not':
375 return wa, ('difference', ta, tb[1]) 393 return wa, ('difference', ta, tb[1])
376 if wa > wb: 394 if wa > wb:
377 op = 'andsmally' 395 op = 'andsmally'
385 return 403 return
386 if len(ss) == 1: 404 if len(ss) == 1:
387 w, t = ss[0] 405 w, t = ss[0]
388 else: 406 else:
389 s = '\0'.join(t[1] for w, t in ss) 407 s = '\0'.join(t[1] for w, t in ss)
390 y = ('func', ('symbol', '_list'), ('string', s)) 408 y = _build('_list(_)', ('string', s))
391 w, t = _optimize(y, False) 409 w, t = _optimize(y, False)
392 ws.append(w) 410 ws.append(w)
393 ts.append(t) 411 ts.append(t)
394 del ss[:] 412 del ss[:]
395 for y in getlist(x[1]): 413 for y in getlist(x[1]):
405 return ws[0], ts[0] # 'or' operation is fully optimized out 423 return ws[0], ts[0] # 'or' operation is fully optimized out
406 return max(ws), (op, ('list',) + tuple(ts)) 424 return max(ws), (op, ('list',) + tuple(ts))
407 elif op == 'not': 425 elif op == 'not':
408 # Optimize not public() to _notpublic() because we have a fast version 426 # Optimize not public() to _notpublic() because we have a fast version
409 if x[1][:3] == ('func', ('symbol', 'public'), None): 427 if x[1][:3] == ('func', ('symbol', 'public'), None):
410 newsym = ('func', ('symbol', '_notpublic'), None) 428 o = _optimize(_build('_notpublic()'), not small)
411 o = _optimize(newsym, not small)
412 return o[0], o[1] 429 return o[0], o[1]
413 else: 430 else:
414 o = _optimize(x[1], not small) 431 o = _optimize(x[1], not small)
415 return o[0], (op, o[1]) 432 return o[0], (op, o[1])
416 elif op == 'rangeall': 433 elif op == 'rangeall':