Mercurial > hg-stable
comparison mercurial/match.py @ 39477:35ecaa999a12
match: improve includematcher.visitchildrenset to be much faster and cached
This improves the speed of visitchildrenset considerably, especially when there
are complicated matchers involved that may have many entries in _dirs or
_parents.
Unfortunately the benchmark isn't easily upstreamed due to its reliance on
https://github.com/vstinner/perf (primarily due to the conflict when importing
it if I were to contribute the benchmark as contrib/matcherbenchmarks.py)
instead of asv or some other perf measurement system.
To describe the benchmark briefly: I generated an includematcher of either 5 or
3500 "rootfilesin:prefix1/prefix2/prefix3/<randomsubdirs, 1-8 levels deep>"
items in the 'setup' function, and then called
`im.visitchildrenset('prefix1/prefix2')` in the 'stmt' function in perf.timeit.
For the set of 5:
- before: 15.3 us +- 2.9 us
- after: 1.59 us +- 0.02 us
For the set of 3500:
- before: 3.90 ms +- 0.10 ms
- after: 3.15 us +- 0.09 us (note the m->u change)
Differential Revision: https://phab.mercurial-scm.org/D4351
author | Kyle Lippincott <spectral@google.com> |
---|---|
date | Fri, 17 Aug 2018 19:18:53 -0700 |
parents | c9a3f7f5c023 |
children | 19ed212de2d1 |
comparison
equal
deleted
inserted
replaced
39476:bc3b99d5627e | 39477:35ecaa999a12 |
---|---|
494 | 494 |
495 @encoding.strmethod | 495 @encoding.strmethod |
496 def __repr__(self): | 496 def __repr__(self): |
497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)) | 497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)) |
498 | 498 |
499 # This is basically a reimplementation of util.dirs that stores the children | |
500 # instead of just a count of them, plus a small optional optimization to avoid | |
501 # some directories we don't need. | |
502 class _dirchildren(object): | |
503 def __init__(self, paths, onlyinclude=None): | |
504 self._dirs = {} | |
505 self._onlyinclude = onlyinclude or [] | |
506 addpath = self.addpath | |
507 for f in paths: | |
508 addpath(f) | |
509 | |
510 def addpath(self, path): | |
511 if path == '.': | |
512 return | |
513 dirs = self._dirs | |
514 findsplitdirs = _dirchildren._findsplitdirs | |
515 for d, b in findsplitdirs(path): | |
516 if d not in self._onlyinclude: | |
517 continue | |
518 dirs.setdefault(d, set()).add(b) | |
519 | |
520 @staticmethod | |
521 def _findsplitdirs(path): | |
522 # yields (dirname, basename) tuples, walking back to the root. This is | |
523 # very similar to util.finddirs, except: | |
524 # - produces a (dirname, basename) tuple, not just 'dirname' | |
525 # - includes root dir | |
526 # Unlike manifest._splittopdir, this does not suffix `dirname` with a | |
527 # slash, and produces '.' for the root instead of ''. | |
528 oldpos = len(path) | |
529 pos = path.rfind('/') | |
530 while pos != -1: | |
531 yield path[:pos], path[pos + 1:oldpos] | |
532 oldpos = pos | |
533 pos = path.rfind('/', 0, pos) | |
534 yield '.', path[:oldpos] | |
535 | |
536 def get(self, path): | |
537 return self._dirs.get(path, set()) | |
538 | |
499 class includematcher(basematcher): | 539 class includematcher(basematcher): |
500 | 540 |
501 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None): | 541 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None): |
502 super(includematcher, self).__init__(root, cwd, badfn) | 542 super(includematcher, self).__init__(root, cwd, badfn) |
503 | 543 |
521 dir in self._dirs or | 561 dir in self._dirs or |
522 dir in self._parents or | 562 dir in self._parents or |
523 any(parentdir in self._roots | 563 any(parentdir in self._roots |
524 for parentdir in util.finddirs(dir))) | 564 for parentdir in util.finddirs(dir))) |
525 | 565 |
566 @propertycache | |
567 def _allparentschildren(self): | |
568 # It may seem odd that we add dirs, roots, and parents, and then | |
569 # restrict to only parents. This is to catch the case of: | |
570 # dirs = ['foo/bar'] | |
571 # parents = ['foo'] | |
572 # if we asked for the children of 'foo', but had only added | |
573 # self._parents, we wouldn't be able to respond ['bar']. | |
574 return _dirchildren( | |
575 itertools.chain(self._dirs, self._roots, self._parents), | |
576 onlyinclude=self._parents) | |
577 | |
526 def visitchildrenset(self, dir): | 578 def visitchildrenset(self, dir): |
527 if self._prefix and dir in self._roots: | 579 if self._prefix and dir in self._roots: |
528 return 'all' | 580 return 'all' |
529 # Note: this does *not* include the 'dir in self._parents' case from | 581 # Note: this does *not* include the 'dir in self._parents' case from |
530 # visitdir, that's handled below. | 582 # visitdir, that's handled below. |
533 dir in self._dirs or | 585 dir in self._dirs or |
534 any(parentdir in self._roots | 586 any(parentdir in self._roots |
535 for parentdir in util.finddirs(dir))): | 587 for parentdir in util.finddirs(dir))): |
536 return 'this' | 588 return 'this' |
537 | 589 |
538 ret = set() | |
539 if dir in self._parents: | 590 if dir in self._parents: |
540 # We add a '/' on to `dir` so that we don't return items that are | 591 return self._allparentschildren.get(dir) or set() |
541 # prefixed by `dir` but are actually siblings of `dir`. | 592 return set() |
542 suffixeddir = dir + '/' if dir != '.' else '' | |
543 # Look in all _roots, _dirs, and _parents for things that start with | |
544 # 'suffixeddir'. | |
545 for d in [q for q in | |
546 itertools.chain(self._roots, self._dirs, self._parents) if | |
547 q.startswith(suffixeddir)]: | |
548 # Don't emit '.' in the response for the root directory | |
549 if not suffixeddir and d == '.': | |
550 continue | |
551 | |
552 # We return the item name without the `suffixeddir` prefix or a | |
553 # slash suffix | |
554 d = d[len(suffixeddir):] | |
555 if '/' in d: | |
556 # This is a subdirectory-of-a-subdirectory, i.e. | |
557 # suffixeddir='foo/', d was 'foo/bar/baz' before removing | |
558 # 'foo/'. | |
559 d = d[:d.index('/')] | |
560 ret.add(d) | |
561 return ret | |
562 | 593 |
563 @encoding.strmethod | 594 @encoding.strmethod |
564 def __repr__(self): | 595 def __repr__(self): |
565 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats)) | 596 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats)) |
566 | 597 |
1236 p.extend(util.dirs(d)) | 1267 p.extend(util.dirs(d)) |
1237 p.extend(util.dirs(r)) | 1268 p.extend(util.dirs(r)) |
1238 # util.dirs() does not include the root directory, so add it manually | 1269 # util.dirs() does not include the root directory, so add it manually |
1239 p.append('.') | 1270 p.append('.') |
1240 | 1271 |
1272 # FIXME: all uses of this function convert these to sets, do so before | |
1273 # returning. | |
1274 # FIXME: all uses of this function do not need anything in 'roots' and | |
1275 # 'dirs' to also be in 'parents', consider removing them before returning. | |
1241 return r, d, p | 1276 return r, d, p |
1242 | 1277 |
1243 def _explicitfiles(kindpats): | 1278 def _explicitfiles(kindpats): |
1244 '''Returns the potential explicit filenames from the patterns. | 1279 '''Returns the potential explicit filenames from the patterns. |
1245 | 1280 |