# HG changeset patch # User Yuya Nishihara # Date 1431843098 -32400 # Node ID 7fbef7932af9bc2c56dfaf5a61f19ad51b424fab # Parent 5dde117269b68851a9131ed06cfb6b57a048c8db revset: optimize 'or' operation of trivial revisions to a list As seen in issue4565 and issue4624, GUI wrappers and automated scripts are likely to generate a long query that just has numeric revisions joined by 'or'. One reason why is that they allows users to choose arbitrary revisions from a list. Because this use case isn't handled well by smartset, let's optimize it to a plain old list. Benchmarks: 1) reduce nesting of chained 'or' operations 2) optimize to a list (this patch) revset #0: 0 + 1 + 2 + ... + 1000 1) wall 0.483341 comb 0.480000 user 0.480000 sys 0.000000 (best of 20) 2) wall 0.025393 comb 0.020000 user 0.020000 sys 0.000000 (best of 107) revset #1: sort(0 + 1 + 2 + ... + 1000) 1) wall 0.035240 comb 0.040000 user 0.040000 sys 0.000000 (best of 100) 2) wall 0.026432 comb 0.030000 user 0.030000 sys 0.000000 (best of 102) revset #2: first(0 + 1 + 2 + ... + 1000) 1) wall 0.028949 comb 0.030000 user 0.030000 sys 0.000000 (best of 100) 2) wall 0.025503 comb 0.030000 user 0.030000 sys 0.000000 (best of 106) diff -r 5dde117269b6 -r 7fbef7932af9 mercurial/revset.py --- a/mercurial/revset.py Fri May 29 21:31:00 2015 +0900 +++ b/mercurial/revset.py Sun May 17 15:11:38 2015 +0900 @@ -2169,11 +2169,36 @@ return w, (op, tb, ta) return w, (op, ta, tb) elif op == 'or': - ws, ts = zip(*[optimize(y, False) for y in x[1:]]) + # fast path for machine-generated expression, that is likely to have + # lots of trivial revisions: 'a + b + c()' to '_list(a b) + c()' + ws, ts, ss = [], [], [] + def flushss(): + if not ss: + return + if len(ss) == 1: + w, t = ss[0] + else: + s = '\0'.join(t[1] for w, t in ss) + y = ('func', ('symbol', '_list'), ('string', s)) + w, t = optimize(y, False) + ws.append(w) + ts.append(t) + del ss[:] + for y in x[1:]: + w, t = optimize(y, False) + if t[0] == 'string' or t[0] == 'symbol': + ss.append((w, t)) + continue + flushss() + ws.append(w) + ts.append(t) + flushss() + if len(ts) == 1: + return ws[0], ts[0] # 'or' operation is fully optimized out # we can't reorder trees by weight because it would change the order. # ("sort(a + b)" == "sort(b + a)", but "a + b" != "b + a") # ts = tuple(t for w, t in sorted(zip(ws, ts), key=lambda wt: wt[0])) - return max(ws), (op,) + ts + return max(ws), (op,) + tuple(ts) elif op == 'not': # Optimize not public() to _notpublic() because we have a fast version if x[1] == ('func', ('symbol', 'public'), None): diff -r 5dde117269b6 -r 7fbef7932af9 tests/test-revset.t --- a/tests/test-revset.t Fri May 29 21:31:00 2015 +0900 +++ b/tests/test-revset.t Sun May 17 15:11:38 2015 +0900 @@ -132,11 +132,7 @@ ('symbol', '1') ('symbol', '2')) * set: - , - , - >> + 0 1 2 @@ -272,9 +268,7 @@ * set: , - , - >> + > 1 2 3 @@ -909,6 +903,114 @@ 4 5 +test optimization of trivial `or` operation + + $ try --optimize '0|(1)|"2"|-2|tip|null' + (or + ('symbol', '0') + (group + ('symbol', '1')) + ('string', '2') + (negate + ('symbol', '2')) + ('symbol', 'tip') + ('symbol', 'null')) + * optimized: + (func + ('symbol', '_list') + ('string', '0\x001\x002\x00-2\x00tip\x00null')) + * set: + + 0 + 1 + 2 + 8 + 9 + -1 + + $ try --optimize '0|1|2:3' + (or + ('symbol', '0') + ('symbol', '1') + (range + ('symbol', '2') + ('symbol', '3'))) + * optimized: + (or + (func + ('symbol', '_list') + ('string', '0\x001')) + (range + ('symbol', '2') + ('symbol', '3'))) + * set: + , + > + 0 + 1 + 2 + 3 + + $ try --optimize '0:1|2|3:4|5|6' + (or + (range + ('symbol', '0') + ('symbol', '1')) + ('symbol', '2') + (range + ('symbol', '3') + ('symbol', '4')) + ('symbol', '5') + ('symbol', '6')) + * optimized: + (or + (range + ('symbol', '0') + ('symbol', '1')) + ('symbol', '2') + (range + ('symbol', '3') + ('symbol', '4')) + (func + ('symbol', '_list') + ('string', '5\x006'))) + * set: + , + >, + , + >> + 0 + 1 + 2 + 3 + 4 + 5 + 6 + +test that `_list` should be narrowed by provided `subset` + + $ log '0:2 and (null|1|2|3)' + 1 + 2 + +test that `_list` should remove duplicates + + $ log '0|1|2|1|2|-1|tip' + 0 + 1 + 2 + 9 + +test unknown revision in `_list` + + $ log '0|unknown' + abort: unknown revision 'unknown'! + [255] + test that chained `or` operations make balanced addsets $ try '0:1|1:2|2:3|3:4|4:5' @@ -1367,9 +1469,7 @@ * set: , - , - >> + > 3 1 2