Mercurial > hg
comparison mercurial/revsetlang.py @ 34044:b862e6fca7ac
revsetlang: build optimized tree by helper function
This should make optimize() more readable, but it doubles the parsing cost.
(original)
$ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \
'L.optimize(L.analyze(L.parse("::tip")))'
10000 loops, best of 3: 18.1 usec per loop
(this patch)
$ python -m timeit -n10000 -s 'from mercurial import revsetlang as L' \
'L._treecache.clear(); L.optimize(L.analyze(L.parse("::tip")))'
10000 loops, best of 3: 48.4 usec per loop
30usec isn't dominant compared to the revset evaluation, but that is a cost.
That's why a parsed tree is cached, which can benefit in hgweb or chg server.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Wed, 17 Feb 2016 21:38:25 +0900 |
parents | 37b82485097f |
children | f23cbca9b277 |
comparison
equal
deleted
inserted
replaced
34043:90896b61fe26 | 34044:b862e6fca7ac |
---|---|
237 | 237 |
238 def getargsdict(x, funcname, keys): | 238 def getargsdict(x, funcname, keys): |
239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), | 239 return parser.buildargsdict(getlist(x), funcname, parser.splitargspec(keys), |
240 keyvaluenode='keyvalue', keynode='symbol') | 240 keyvaluenode='keyvalue', keynode='symbol') |
241 | 241 |
242 # cache of {spec: raw parsed tree} built internally | |
243 _treecache = {} | |
244 | |
245 def _cachedtree(spec): | |
246 # thread safe because parse() is reentrant and dict.__setitem__() is atomic | |
247 tree = _treecache.get(spec) | |
248 if tree is None: | |
249 _treecache[spec] = tree = parse(spec) | |
250 return tree | |
251 | |
252 def _build(tmplspec, *repls): | |
253 """Create raw parsed tree from a template revset statement | |
254 | |
255 >>> _build('f(_) and _', ('string', '1'), ('symbol', '2')) | |
256 ('and', ('func', ('symbol', 'f'), ('string', '1')), ('symbol', '2')) | |
257 """ | |
258 template = _cachedtree(tmplspec) | |
259 return parser.buildtree(template, ('symbol', '_'), *repls) | |
260 | |
242 def _isnamedfunc(x, funcname): | 261 def _isnamedfunc(x, funcname): |
243 """Check if given tree matches named function""" | 262 """Check if given tree matches named function""" |
244 return x and x[0] == 'func' and getsymbol(x[1]) == funcname | 263 return x and x[0] == 'func' and getsymbol(x[1]) == funcname |
245 | 264 |
246 def _isposargs(x, n): | 265 def _isposargs(x, n): |
300 if x is None: | 319 if x is None: |
301 return x | 320 return x |
302 | 321 |
303 op = x[0] | 322 op = x[0] |
304 if op == 'minus': | 323 if op == 'minus': |
305 return _analyze(('and', x[1], ('not', x[2]))) | 324 return _analyze(_build('_ and not _', *x[1:])) |
306 elif op == 'only': | 325 elif op == 'only': |
307 t = ('func', ('symbol', 'only'), ('list', x[1], x[2])) | 326 return _analyze(_build('only(_, _)', *x[1:])) |
308 return _analyze(t) | |
309 elif op == 'onlypost': | 327 elif op == 'onlypost': |
310 return _analyze(('func', ('symbol', 'only'), x[1])) | 328 return _analyze(_build('only(_)', x[1])) |
311 elif op == 'dagrangepre': | 329 elif op == 'dagrangepre': |
312 return _analyze(('func', ('symbol', 'ancestors'), x[1])) | 330 return _analyze(_build('ancestors(_)', x[1])) |
313 elif op == 'dagrangepost': | 331 elif op == 'dagrangepost': |
314 return _analyze(('func', ('symbol', 'descendants'), x[1])) | 332 return _analyze(_build('descendants(_)', x[1])) |
315 elif op == 'negate': | 333 elif op == 'negate': |
316 s = getstring(x[1], _("can't negate that")) | 334 s = getstring(x[1], _("can't negate that")) |
317 return _analyze(('string', '-' + s)) | 335 return _analyze(('string', '-' + s)) |
318 elif op in ('string', 'symbol'): | 336 elif op in ('string', 'symbol'): |
319 return x | 337 return x |
365 wa, ta = _optimize(x[1], True) | 383 wa, ta = _optimize(x[1], True) |
366 wb, tb = _optimize(x[2], True) | 384 wb, tb = _optimize(x[2], True) |
367 w = min(wa, wb) | 385 w = min(wa, wb) |
368 | 386 |
369 # (::x and not ::y)/(not ::y and ::x) have a fast path | 387 # (::x and not ::y)/(not ::y and ::x) have a fast path |
370 tm = _matchonly(ta, tb) or _matchonly(tb, ta) | 388 m = _matchonly(ta, tb) or _matchonly(tb, ta) |
371 if tm: | 389 if m: |
372 return w, ('func', ('symbol', 'only'), tm) | 390 return w, _build('only(_, _)', *m[1:]) |
373 | 391 |
374 if tb is not None and tb[0] == 'not': | 392 if tb is not None and tb[0] == 'not': |
375 return wa, ('difference', ta, tb[1]) | 393 return wa, ('difference', ta, tb[1]) |
376 if wa > wb: | 394 if wa > wb: |
377 op = 'andsmally' | 395 op = 'andsmally' |
385 return | 403 return |
386 if len(ss) == 1: | 404 if len(ss) == 1: |
387 w, t = ss[0] | 405 w, t = ss[0] |
388 else: | 406 else: |
389 s = '\0'.join(t[1] for w, t in ss) | 407 s = '\0'.join(t[1] for w, t in ss) |
390 y = ('func', ('symbol', '_list'), ('string', s)) | 408 y = _build('_list(_)', ('string', s)) |
391 w, t = _optimize(y, False) | 409 w, t = _optimize(y, False) |
392 ws.append(w) | 410 ws.append(w) |
393 ts.append(t) | 411 ts.append(t) |
394 del ss[:] | 412 del ss[:] |
395 for y in getlist(x[1]): | 413 for y in getlist(x[1]): |
405 return ws[0], ts[0] # 'or' operation is fully optimized out | 423 return ws[0], ts[0] # 'or' operation is fully optimized out |
406 return max(ws), (op, ('list',) + tuple(ts)) | 424 return max(ws), (op, ('list',) + tuple(ts)) |
407 elif op == 'not': | 425 elif op == 'not': |
408 # Optimize not public() to _notpublic() because we have a fast version | 426 # Optimize not public() to _notpublic() because we have a fast version |
409 if x[1][:3] == ('func', ('symbol', 'public'), None): | 427 if x[1][:3] == ('func', ('symbol', 'public'), None): |
410 newsym = ('func', ('symbol', '_notpublic'), None) | 428 o = _optimize(_build('_notpublic()'), not small) |
411 o = _optimize(newsym, not small) | |
412 return o[0], o[1] | 429 return o[0], o[1] |
413 else: | 430 else: |
414 o = _optimize(x[1], not small) | 431 o = _optimize(x[1], not small) |
415 return o[0], (op, o[1]) | 432 return o[0], (op, o[1]) |
416 elif op == 'rangeall': | 433 elif op == 'rangeall': |