Mercurial > hg
comparison mercurial/filesetlang.py @ 38865:899b4c74209c
fileset: combine union of basic patterns into single matcher
This appears to improve query performance in a big repository than I thought.
Writing less Python in a hot loop, faster computation we gain.
$ hg files --cwd mozilla-central --time 'set:a* + b* + c* + d* + e*'
(orig) time: real 0.670 secs (user 0.640+0.000 sys 0.030+0.000)
(new) time: real 0.210 secs (user 0.180+0.000 sys 0.020+0.000)
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sat, 21 Jul 2018 17:19:12 +0900 |
parents | 73731fa8d1bd |
children | e79a69af1593 |
comparison
equal
deleted
inserted
replaced
38864:73731fa8d1bd | 38865:899b4c74209c |
---|---|
183 def _optimizeandops(op, ta, tb): | 183 def _optimizeandops(op, ta, tb): |
184 if tb is not None and tb[0] == 'not': | 184 if tb is not None and tb[0] == 'not': |
185 return ('minus', ta, tb[1]) | 185 return ('minus', ta, tb[1]) |
186 return (op, ta, tb) | 186 return (op, ta, tb) |
187 | 187 |
188 def _optimizeunion(xs): | |
189 # collect string patterns so they can be compiled into a single regexp | |
190 ws, ts, ss = [], [], [] | |
191 for x in xs: | |
192 w, t = _optimize(x) | |
193 if t is not None and t[0] in {'string', 'symbol', 'kindpat'}: | |
194 ss.append(t) | |
195 continue | |
196 ws.append(w) | |
197 ts.append(t) | |
198 if ss: | |
199 ws.append(WEIGHT_CHECK_FILENAME) | |
200 ts.append(('patterns',) + tuple(ss)) | |
201 return ws, ts | |
202 | |
188 def _optimize(x): | 203 def _optimize(x): |
189 if x is None: | 204 if x is None: |
190 return 0, x | 205 return 0, x |
191 | 206 |
192 op = x[0] | 207 op = x[0] |
204 if wa <= wb: | 219 if wa <= wb: |
205 return wa, _optimizeandops(op, ta, tb) | 220 return wa, _optimizeandops(op, ta, tb) |
206 else: | 221 else: |
207 return wb, _optimizeandops(op, tb, ta) | 222 return wb, _optimizeandops(op, tb, ta) |
208 if op == 'or': | 223 if op == 'or': |
209 ws, ts = zip(*(_optimize(y) for y in x[1:])) | 224 ws, ts = _optimizeunion(x[1:]) |
225 if len(ts) == 1: | |
226 return ws[0], ts[0] # 'or' operation is fully optimized out | |
210 ts = tuple(it[1] for it in sorted(enumerate(ts), | 227 ts = tuple(it[1] for it in sorted(enumerate(ts), |
211 key=lambda it: ws[it[0]])) | 228 key=lambda it: ws[it[0]])) |
212 return max(ws), (op,) + ts | 229 return max(ws), (op,) + ts |
213 if op == 'list': | 230 if op == 'list': |
214 ws, ts = zip(*(_optimize(y) for y in x[1:])) | 231 ws, ts = zip(*(_optimize(y) for y in x[1:])) |