Mercurial > hg
changeset 25343:7fbef7932af9
revset: optimize 'or' operation of trivial revisions to a list
As seen in issue4565 and issue4624, GUI wrappers and automated scripts are
likely to generate a long query that just has numeric revisions joined by 'or'.
One reason why is that they allows users to choose arbitrary revisions from
a list. Because this use case isn't handled well by smartset, let's optimize
it to a plain old list.
Benchmarks:
1) reduce nesting of chained 'or' operations
2) optimize to a list (this patch)
revset #0: 0 + 1 + 2 + ... + 1000
1) wall 0.483341 comb 0.480000 user 0.480000 sys 0.000000 (best of 20)
2) wall 0.025393 comb 0.020000 user 0.020000 sys 0.000000 (best of 107)
revset #1: sort(0 + 1 + 2 + ... + 1000)
1) wall 0.035240 comb 0.040000 user 0.040000 sys 0.000000 (best of 100)
2) wall 0.026432 comb 0.030000 user 0.030000 sys 0.000000 (best of 102)
revset #2: first(0 + 1 + 2 + ... + 1000)
1) wall 0.028949 comb 0.030000 user 0.030000 sys 0.000000 (best of 100)
2) wall 0.025503 comb 0.030000 user 0.030000 sys 0.000000 (best of 106)
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 17 May 2015 15:11:38 +0900 |
parents | 5dde117269b6 |
children | ceaf04bb14ff |
files | mercurial/revset.py tests/test-revset.t |
diffstat | 2 files changed, 138 insertions(+), 13 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/revset.py Fri May 29 21:31:00 2015 +0900 +++ b/mercurial/revset.py Sun May 17 15:11:38 2015 +0900 @@ -2169,11 +2169,36 @@ return w, (op, tb, ta) return w, (op, ta, tb) elif op == 'or': - ws, ts = zip(*[optimize(y, False) for y in x[1:]]) + # fast path for machine-generated expression, that is likely to have + # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' + ws, ts, ss = [], [], [] + def flushss(): + if not ss: + return + if len(ss) == 1: + w, t = ss[0] + else: + s = '\0'.join(t[1] for w, t in ss) + y = ('func', ('symbol', '_list'), ('string', s)) + w, t = optimize(y, False) + ws.append(w) + ts.append(t) + del ss[:] + for y in x[1:]: + w, t = optimize(y, False) + if t[0] == 'string' or t[0] == 'symbol': + ss.append((w, t)) + continue + flushss() + ws.append(w) + ts.append(t) + flushss() + if len(ts) == 1: + return ws[0], ts[0] # 'or' operation is fully optimized out # we can't reorder trees by weight because it would change the order. # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a") # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0])) - return max(ws), (op,) + ts + return max(ws), (op,) + tuple(ts) elif op == 'not': # Optimize not public() to _notpublic() because we have a fast version if x[1] == ('func', ('symbol', 'public'), None):
--- a/tests/test-revset.t Fri May 29 21:31:00 2015 +0900 +++ b/tests/test-revset.t Sun May 17 15:11:38 2015 +0900 @@ -132,11 +132,7 @@ ('symbol', '1') ('symbol', '2')) * set: - <addset - <baseset [0]>, - <addset - <baseset [1]>, - <baseset [2]>>> + <baseset [0, 1, 2]> 0 1 2 @@ -272,9 +268,7 @@ * set: <addset <baseset [1]>, - <addset - <baseset [2]>, - <baseset [3]>>> + <baseset [2, 3]>> 1 2 3 @@ -909,6 +903,114 @@ 4 5 +test optimization of trivial `or` operation + + $ try --optimize '0|(1)|"2"|-2|tip|null' + (or + ('symbol', '0') + (group + ('symbol', '1')) + ('string', '2') + (negate + ('symbol', '2')) + ('symbol', 'tip') + ('symbol', 'null')) + * optimized: + (func + ('symbol', '_list') + ('string', '0\x001\x002\x00-2\x00tip\x00null')) + * set: + <baseset [0, 1, 2, 8, 9, -1]> + 0 + 1 + 2 + 8 + 9 + -1 + + $ try --optimize '0|1|2:3' + (or + ('symbol', '0') + ('symbol', '1') + (range + ('symbol', '2') + ('symbol', '3'))) + * optimized: + (or + (func + ('symbol', '_list') + ('string', '0\x001')) + (range + ('symbol', '2') + ('symbol', '3'))) + * set: + <addset + <baseset [0, 1]>, + <spanset+ 2:3>> + 0 + 1 + 2 + 3 + + $ try --optimize '0:1|2|3:4|5|6' + (or + (range + ('symbol', '0') + ('symbol', '1')) + ('symbol', '2') + (range + ('symbol', '3') + ('symbol', '4')) + ('symbol', '5') + ('symbol', '6')) + * optimized: + (or + (range + ('symbol', '0') + ('symbol', '1')) + ('symbol', '2') + (range + ('symbol', '3') + ('symbol', '4')) + (func + ('symbol', '_list') + ('string', '5\x006'))) + * set: + <addset + <addset + <spanset+ 0:1>, + <baseset [2]>>, + <addset + <spanset+ 3:4>, + <baseset [5, 6]>>> + 0 + 1 + 2 + 3 + 4 + 5 + 6 + +test that `_list` should be narrowed by provided `subset` + + $ log '0:2 and (null|1|2|3)' + 1 + 2 + +test that `_list` should remove duplicates + + $ log '0|1|2|1|2|-1|tip' + 0 + 1 + 2 + 9 + +test unknown revision in `_list` + + $ log '0|unknown' + abort: unknown revision 'unknown'! + [255] + test that chained `or` operations make balanced addsets $ try '0:1|1:2|2:3|3:4|4:5' @@ -1367,9 +1469,7 @@ * set: <addset <baseset [3]>, - <addset - <baseset [1]>, - <baseset [2]>>> + <baseset [1, 2]>> 3 1 2