comparison mercurial/filesetlang.py @ 38865:899b4c74209c

fileset: combine union of basic patterns into single matcher This appears to improve query performance in a big repository than I thought. Writing less Python in a hot loop, faster computation we gain. $ hg files --cwd mozilla-central --time 'set:a* + b* + c* + d* + e*' (orig) time: real 0.670 secs (user 0.640+0.000 sys 0.030+0.000) (new) time: real 0.210 secs (user 0.180+0.000 sys 0.020+0.000)
author Yuya Nishihara <yuya@tcha.org>
date Sat, 21 Jul 2018 17:19:12 +0900
parents 73731fa8d1bd
children e79a69af1593
comparison
equal deleted inserted replaced
38864:73731fa8d1bd 38865:899b4c74209c
183 def _optimizeandops(op, ta, tb): 183 def _optimizeandops(op, ta, tb):
184 if tb is not None and tb[0] == 'not': 184 if tb is not None and tb[0] == 'not':
185 return ('minus', ta, tb[1]) 185 return ('minus', ta, tb[1])
186 return (op, ta, tb) 186 return (op, ta, tb)
187 187
188 def _optimizeunion(xs):
189 # collect string patterns so they can be compiled into a single regexp
190 ws, ts, ss = [], [], []
191 for x in xs:
192 w, t = _optimize(x)
193 if t is not None and t[0] in {'string', 'symbol', 'kindpat'}:
194 ss.append(t)
195 continue
196 ws.append(w)
197 ts.append(t)
198 if ss:
199 ws.append(WEIGHT_CHECK_FILENAME)
200 ts.append(('patterns',) + tuple(ss))
201 return ws, ts
202
188 def _optimize(x): 203 def _optimize(x):
189 if x is None: 204 if x is None:
190 return 0, x 205 return 0, x
191 206
192 op = x[0] 207 op = x[0]
204 if wa <= wb: 219 if wa <= wb:
205 return wa, _optimizeandops(op, ta, tb) 220 return wa, _optimizeandops(op, ta, tb)
206 else: 221 else:
207 return wb, _optimizeandops(op, tb, ta) 222 return wb, _optimizeandops(op, tb, ta)
208 if op == 'or': 223 if op == 'or':
209 ws, ts = zip(*(_optimize(y) for y in x[1:])) 224 ws, ts = _optimizeunion(x[1:])
225 if len(ts) == 1:
226 return ws[0], ts[0] # 'or' operation is fully optimized out
210 ts = tuple(it[1] for it in sorted(enumerate(ts), 227 ts = tuple(it[1] for it in sorted(enumerate(ts),
211 key=lambda it: ws[it[0]])) 228 key=lambda it: ws[it[0]]))
212 return max(ws), (op,) + ts 229 return max(ws), (op,) + ts
213 if op == 'list': 230 if op == 'list':
214 ws, ts = zip(*(_optimize(y) for y in x[1:])) 231 ws, ts = zip(*(_optimize(y) for y in x[1:]))