Mercurial > hg
comparison mercurial/match.py @ 45999:c4c7a6b61146
match: skip walking up the directory hierarchy if the number of pats are small
Previously, we would receive a path like abc/def/ghi and "walk up" the directory
hierarchy, checking abc/def, abc, and `b''` to see if they were in the set of
prefixes that this matcher covered. We did this indiscriminately - we generated
all of these paths even if the set of prefixes the matcher covered was
completely empty, which is the case for a lot of repos at my company (the narrow
matcher we use is usually non-recursive).
This brings the time for a rebase in one of my repos from 12.20s to 10.87s. In
this particular repo, this is entirely due to the `len(prefix_set) == 0` check,
as I do not have any recursive patterns in the narrowspec.
Differential Revision: https://phab.mercurial-scm.org/D9488
author | Kyle Lippincott <spectral@google.com> |
---|---|
date | Mon, 30 Nov 2020 12:30:58 -0800 |
parents | 464539c305aa |
children | 1f0ed7e63c2a |
comparison
equal
deleted
inserted
replaced
45998:cb623dedb17c | 45999:c4c7a6b61146 |
---|---|
561 self.matchfn | 561 self.matchfn |
562 ) | 562 ) |
563 return b'<predicatenmatcher pred=%s>' % s | 563 return b'<predicatenmatcher pred=%s>' % s |
564 | 564 |
565 | 565 |
566 def path_or_parents_in_set(path, prefix_set): | |
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`.""" | |
568 l = len(prefix_set) | |
569 if l == 0: | |
570 return False | |
571 if path in prefix_set: | |
572 return True | |
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to | |
574 # "walk up" the directory hierarchy instead, with the assumption that most | |
575 # directory hierarchies are relatively shallow and hash lookup is cheap. | |
576 if l > 5: | |
577 return any( | |
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path) | |
579 ) | |
580 | |
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd | |
582 # recognize ourselves as an 'always' matcher and skip this. | |
583 if b'' in prefix_set: | |
584 return True | |
585 | |
586 if pycompat.ispy3: | |
587 sl = ord(b'/') | |
588 else: | |
589 sl = '/' | |
590 | |
591 # We already checked that path isn't in prefix_set exactly, so | |
592 # `path[len(pf)] should never raise IndexError. | |
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set) | |
594 | |
595 | |
566 class patternmatcher(basematcher): | 596 class patternmatcher(basematcher): |
567 r"""Matches a set of (kind, pat, source) against a 'root' directory. | 597 r"""Matches a set of (kind, pat, source) against a 'root' directory. |
568 | 598 |
569 >>> kindpats = [ | 599 >>> kindpats = [ |
570 ... (b're', br'.*\.c$', b''), | 600 ... (b're', br'.*\.c$', b''), |
609 | 639 |
610 def visitdir(self, dir): | 640 def visitdir(self, dir): |
611 if self._prefix and dir in self._fileset: | 641 if self._prefix and dir in self._fileset: |
612 return b'all' | 642 return b'all' |
613 return ( | 643 return ( |
614 dir in self._fileset | 644 dir in self._dirs |
615 or dir in self._dirs | 645 or path_or_parents_in_set(dir, self._fileset) |
616 or any( | |
617 parentdir in self._fileset | |
618 for parentdir in pathutil.finddirs(dir) | |
619 ) | |
620 ) | 646 ) |
621 | 647 |
622 def visitchildrenset(self, dir): | 648 def visitchildrenset(self, dir): |
623 ret = self.visitdir(dir) | 649 ret = self.visitdir(dir) |
624 if ret is True: | 650 if ret is True: |
696 | 722 |
697 def visitdir(self, dir): | 723 def visitdir(self, dir): |
698 if self._prefix and dir in self._roots: | 724 if self._prefix and dir in self._roots: |
699 return b'all' | 725 return b'all' |
700 return ( | 726 return ( |
701 dir in self._roots | 727 dir in self._dirs |
702 or dir in self._dirs | |
703 or dir in self._parents | 728 or dir in self._parents |
704 or any( | 729 or path_or_parents_in_set(dir, self._roots) |
705 parentdir in self._roots for parentdir in pathutil.finddirs(dir) | |
706 ) | |
707 ) | 730 ) |
708 | 731 |
709 @propertycache | 732 @propertycache |
710 def _allparentschildren(self): | 733 def _allparentschildren(self): |
711 # It may seem odd that we add dirs, roots, and parents, and then | 734 # It may seem odd that we add dirs, roots, and parents, and then |
724 return b'all' | 747 return b'all' |
725 # Note: this does *not* include the 'dir in self._parents' case from | 748 # Note: this does *not* include the 'dir in self._parents' case from |
726 # visitdir, that's handled below. | 749 # visitdir, that's handled below. |
727 if ( | 750 if ( |
728 b'' in self._roots | 751 b'' in self._roots |
729 or dir in self._roots | |
730 or dir in self._dirs | 752 or dir in self._dirs |
731 or any( | 753 or path_or_parents_in_set(dir, self._roots) |
732 parentdir in self._roots for parentdir in pathutil.finddirs(dir) | |
733 ) | |
734 ): | 754 ): |
735 return b'this' | 755 return b'this' |
736 | 756 |
737 if dir in self._parents: | 757 if dir in self._parents: |
738 return self._allparentschildren.get(dir) or set() | 758 return self._allparentschildren.get(dir) or set() |