comparison mercurial/match.py @ 39477:35ecaa999a12

match: improve includematcher.visitchildrenset to be much faster and cached This improves the speed of visitchildrenset considerably, especially when there are complicated matchers involved that may have many entries in _dirs or _parents. Unfortunately the benchmark isn't easily upstreamed due to its reliance on https://github.com/vstinner/perf (primarily due to the conflict when importing it if I were to contribute the benchmark as contrib/matcherbenchmarks.py) instead of asv or some other perf measurement system. To describe the benchmark briefly: I generated an includematcher of either 5 or 3500 "rootfilesin:prefix1/prefix2/prefix3/<randomsubdirs, 1-8 levels deep>" items in the 'setup' function, and then called `im.visitchildrenset('prefix1/prefix2')` in the 'stmt' function in perf.timeit. For the set of 5: - before: 15.3 us +- 2.9 us - after: 1.59 us +- 0.02 us For the set of 3500: - before: 3.90 ms +- 0.10 ms - after: 3.15 us +- 0.09 us (note the m->u change) Differential Revision: https://phab.mercurial-scm.org/D4351
author Kyle Lippincott <spectral@google.com>
date Fri, 17 Aug 2018 19:18:53 -0700
parents c9a3f7f5c023
children 19ed212de2d1
comparison
equal deleted inserted replaced
39476:bc3b99d5627e 39477:35ecaa999a12
494 494
495 @encoding.strmethod 495 @encoding.strmethod
496 def __repr__(self): 496 def __repr__(self):
497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats)) 497 return ('<patternmatcher patterns=%r>' % pycompat.bytestr(self._pats))
498 498
499 # This is basically a reimplementation of util.dirs that stores the children
500 # instead of just a count of them, plus a small optional optimization to avoid
501 # some directories we don't need.
502 class _dirchildren(object):
503 def __init__(self, paths, onlyinclude=None):
504 self._dirs = {}
505 self._onlyinclude = onlyinclude or []
506 addpath = self.addpath
507 for f in paths:
508 addpath(f)
509
510 def addpath(self, path):
511 if path == '.':
512 return
513 dirs = self._dirs
514 findsplitdirs = _dirchildren._findsplitdirs
515 for d, b in findsplitdirs(path):
516 if d not in self._onlyinclude:
517 continue
518 dirs.setdefault(d, set()).add(b)
519
520 @staticmethod
521 def _findsplitdirs(path):
522 # yields (dirname, basename) tuples, walking back to the root. This is
523 # very similar to util.finddirs, except:
524 # - produces a (dirname, basename) tuple, not just 'dirname'
525 # - includes root dir
526 # Unlike manifest._splittopdir, this does not suffix `dirname` with a
527 # slash, and produces '.' for the root instead of ''.
528 oldpos = len(path)
529 pos = path.rfind('/')
530 while pos != -1:
531 yield path[:pos], path[pos + 1:oldpos]
532 oldpos = pos
533 pos = path.rfind('/', 0, pos)
534 yield '.', path[:oldpos]
535
536 def get(self, path):
537 return self._dirs.get(path, set())
538
499 class includematcher(basematcher): 539 class includematcher(basematcher):
500 540
501 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None): 541 def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None):
502 super(includematcher, self).__init__(root, cwd, badfn) 542 super(includematcher, self).__init__(root, cwd, badfn)
503 543
521 dir in self._dirs or 561 dir in self._dirs or
522 dir in self._parents or 562 dir in self._parents or
523 any(parentdir in self._roots 563 any(parentdir in self._roots
524 for parentdir in util.finddirs(dir))) 564 for parentdir in util.finddirs(dir)))
525 565
566 @propertycache
567 def _allparentschildren(self):
568 # It may seem odd that we add dirs, roots, and parents, and then
569 # restrict to only parents. This is to catch the case of:
570 # dirs = ['foo/bar']
571 # parents = ['foo']
572 # if we asked for the children of 'foo', but had only added
573 # self._parents, we wouldn't be able to respond ['bar'].
574 return _dirchildren(
575 itertools.chain(self._dirs, self._roots, self._parents),
576 onlyinclude=self._parents)
577
526 def visitchildrenset(self, dir): 578 def visitchildrenset(self, dir):
527 if self._prefix and dir in self._roots: 579 if self._prefix and dir in self._roots:
528 return 'all' 580 return 'all'
529 # Note: this does *not* include the 'dir in self._parents' case from 581 # Note: this does *not* include the 'dir in self._parents' case from
530 # visitdir, that's handled below. 582 # visitdir, that's handled below.
533 dir in self._dirs or 585 dir in self._dirs or
534 any(parentdir in self._roots 586 any(parentdir in self._roots
535 for parentdir in util.finddirs(dir))): 587 for parentdir in util.finddirs(dir))):
536 return 'this' 588 return 'this'
537 589
538 ret = set()
539 if dir in self._parents: 590 if dir in self._parents:
540 # We add a '/' on to `dir` so that we don't return items that are 591 return self._allparentschildren.get(dir) or set()
541 # prefixed by `dir` but are actually siblings of `dir`. 592 return set()
542 suffixeddir = dir + '/' if dir != '.' else ''
543 # Look in all _roots, _dirs, and _parents for things that start with
544 # 'suffixeddir'.
545 for d in [q for q in
546 itertools.chain(self._roots, self._dirs, self._parents) if
547 q.startswith(suffixeddir)]:
548 # Don't emit '.' in the response for the root directory
549 if not suffixeddir and d == '.':
550 continue
551
552 # We return the item name without the `suffixeddir` prefix or a
553 # slash suffix
554 d = d[len(suffixeddir):]
555 if '/' in d:
556 # This is a subdirectory-of-a-subdirectory, i.e.
557 # suffixeddir='foo/', d was 'foo/bar/baz' before removing
558 # 'foo/'.
559 d = d[:d.index('/')]
560 ret.add(d)
561 return ret
562 593
563 @encoding.strmethod 594 @encoding.strmethod
564 def __repr__(self): 595 def __repr__(self):
565 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats)) 596 return ('<includematcher includes=%r>' % pycompat.bytestr(self._pats))
566 597
1236 p.extend(util.dirs(d)) 1267 p.extend(util.dirs(d))
1237 p.extend(util.dirs(r)) 1268 p.extend(util.dirs(r))
1238 # util.dirs() does not include the root directory, so add it manually 1269 # util.dirs() does not include the root directory, so add it manually
1239 p.append('.') 1270 p.append('.')
1240 1271
1272 # FIXME: all uses of this function convert these to sets, do so before
1273 # returning.
1274 # FIXME: all uses of this function do not need anything in 'roots' and
1275 # 'dirs' to also be in 'parents', consider removing them before returning.
1241 return r, d, p 1276 return r, d, p
1242 1277
1243 def _explicitfiles(kindpats): 1278 def _explicitfiles(kindpats):
1244 '''Returns the potential explicit filenames from the patterns. 1279 '''Returns the potential explicit filenames from the patterns.
1245 1280