comparison mercurial/match.py @ 45999:c4c7a6b61146

match: skip walking up the directory hierarchy if the number of pats are small Previously, we would receive a path like abc/def/ghi and "walk up" the directory hierarchy, checking abc/def, abc, and `b''` to see if they were in the set of prefixes that this matcher covered. We did this indiscriminately - we generated all of these paths even if the set of prefixes the matcher covered was completely empty, which is the case for a lot of repos at my company (the narrow matcher we use is usually non-recursive). This brings the time for a rebase in one of my repos from 12.20s to 10.87s. In this particular repo, this is entirely due to the `len(prefix_set) == 0` check, as I do not have any recursive patterns in the narrowspec. Differential Revision: https://phab.mercurial-scm.org/D9488
author Kyle Lippincott <spectral@google.com>
date Mon, 30 Nov 2020 12:30:58 -0800
parents 464539c305aa
children 1f0ed7e63c2a
comparison
equal deleted inserted replaced
45998:cb623dedb17c 45999:c4c7a6b61146
561 self.matchfn 561 self.matchfn
562 ) 562 )
563 return b'<predicatenmatcher pred=%s>' % s 563 return b'<predicatenmatcher pred=%s>' % s
564 564
565 565
566 def path_or_parents_in_set(path, prefix_set):
567 """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
568 l = len(prefix_set)
569 if l == 0:
570 return False
571 if path in prefix_set:
572 return True
573 # If there's more than 5 paths in prefix_set, it's *probably* quicker to
574 # "walk up" the directory hierarchy instead, with the assumption that most
575 # directory hierarchies are relatively shallow and hash lookup is cheap.
576 if l > 5:
577 return any(
578 parentdir in prefix_set for parentdir in pathutil.finddirs(path)
579 )
580
581 # FIXME: Ideally we'd never get to this point if this is the case - we'd
582 # recognize ourselves as an 'always' matcher and skip this.
583 if b'' in prefix_set:
584 return True
585
586 if pycompat.ispy3:
587 sl = ord(b'/')
588 else:
589 sl = '/'
590
591 # We already checked that path isn't in prefix_set exactly, so
592 # `path[len(pf)] should never raise IndexError.
593 return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
594
595
566 class patternmatcher(basematcher): 596 class patternmatcher(basematcher):
567 r"""Matches a set of (kind, pat, source) against a 'root' directory. 597 r"""Matches a set of (kind, pat, source) against a 'root' directory.
568 598
569 >>> kindpats = [ 599 >>> kindpats = [
570 ... (b're', br'.*\.c$', b''), 600 ... (b're', br'.*\.c$', b''),
609 639
610 def visitdir(self, dir): 640 def visitdir(self, dir):
611 if self._prefix and dir in self._fileset: 641 if self._prefix and dir in self._fileset:
612 return b'all' 642 return b'all'
613 return ( 643 return (
614 dir in self._fileset 644 dir in self._dirs
615 or dir in self._dirs 645 or path_or_parents_in_set(dir, self._fileset)
616 or any(
617 parentdir in self._fileset
618 for parentdir in pathutil.finddirs(dir)
619 )
620 ) 646 )
621 647
622 def visitchildrenset(self, dir): 648 def visitchildrenset(self, dir):
623 ret = self.visitdir(dir) 649 ret = self.visitdir(dir)
624 if ret is True: 650 if ret is True:
696 722
697 def visitdir(self, dir): 723 def visitdir(self, dir):
698 if self._prefix and dir in self._roots: 724 if self._prefix and dir in self._roots:
699 return b'all' 725 return b'all'
700 return ( 726 return (
701 dir in self._roots 727 dir in self._dirs
702 or dir in self._dirs
703 or dir in self._parents 728 or dir in self._parents
704 or any( 729 or path_or_parents_in_set(dir, self._roots)
705 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
706 )
707 ) 730 )
708 731
709 @propertycache 732 @propertycache
710 def _allparentschildren(self): 733 def _allparentschildren(self):
711 # It may seem odd that we add dirs, roots, and parents, and then 734 # It may seem odd that we add dirs, roots, and parents, and then
724 return b'all' 747 return b'all'
725 # Note: this does *not* include the 'dir in self._parents' case from 748 # Note: this does *not* include the 'dir in self._parents' case from
726 # visitdir, that's handled below. 749 # visitdir, that's handled below.
727 if ( 750 if (
728 b'' in self._roots 751 b'' in self._roots
729 or dir in self._roots
730 or dir in self._dirs 752 or dir in self._dirs
731 or any( 753 or path_or_parents_in_set(dir, self._roots)
732 parentdir in self._roots for parentdir in pathutil.finddirs(dir)
733 )
734 ): 754 ):
735 return b'this' 755 return b'this'
736 756
737 if dir in self._parents: 757 if dir in self._parents:
738 return self._allparentschildren.get(dir) or set() 758 return self._allparentschildren.get(dir) or set()