# HG changeset patch # User Kyle Lippincott # Date 1606768258 28800 # Node ID c4c7a6b611467725d582bb2836bd38aabda71953 # Parent cb623dedb17c61895a6b4218774f4d7e11a023a8 match: skip walking up the directory hierarchy if the number of pats are small Previously, we would receive a path like abc/def/ghi and "walk up" the directory hierarchy, checking abc/def, abc, and `b''` to see if they were in the set of prefixes that this matcher covered. We did this indiscriminately - we generated all of these paths even if the set of prefixes the matcher covered was completely empty, which is the case for a lot of repos at my company (the narrow matcher we use is usually non-recursive). This brings the time for a rebase in one of my repos from 12.20s to 10.87s. In this particular repo, this is entirely due to the `len(prefix_set) == 0` check, as I do not have any recursive patterns in the narrowspec. Differential Revision: https://phab.mercurial-scm.org/D9488 diff -r cb623dedb17c -r c4c7a6b61146 mercurial/match.py --- a/mercurial/match.py Tue Dec 01 22:19:01 2020 +0100 +++ b/mercurial/match.py Mon Nov 30 12:30:58 2020 -0800 @@ -563,6 +563,36 @@ return b'' % s +def path_or_parents_in_set(path, prefix_set): + """Returns True if `path` (or any parent of `path`) is in `prefix_set`.""" + l = len(prefix_set) + if l == 0: + return False + if path in prefix_set: + return True + # If there's more than 5 paths in prefix_set, it's *probably* quicker to + # "walk up" the directory hierarchy instead, with the assumption that most + # directory hierarchies are relatively shallow and hash lookup is cheap. + if l > 5: + return any( + parentdir in prefix_set for parentdir in pathutil.finddirs(path) + ) + + # FIXME: Ideally we'd never get to this point if this is the case - we'd + # recognize ourselves as an 'always' matcher and skip this. + if b'' in prefix_set: + return True + + if pycompat.ispy3: + sl = ord(b'/') + else: + sl = '/' + + # We already checked that path isn't in prefix_set exactly, so + # `path[len(pf)] should never raise IndexError. + return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set) + + class patternmatcher(basematcher): r"""Matches a set of (kind, pat, source) against a 'root' directory. @@ -611,12 +641,8 @@ if self._prefix and dir in self._fileset: return b'all' return ( - dir in self._fileset - or dir in self._dirs - or any( - parentdir in self._fileset - for parentdir in pathutil.finddirs(dir) - ) + dir in self._dirs + or path_or_parents_in_set(dir, self._fileset) ) def visitchildrenset(self, dir): @@ -698,12 +724,9 @@ if self._prefix and dir in self._roots: return b'all' return ( - dir in self._roots - or dir in self._dirs + dir in self._dirs or dir in self._parents - or any( - parentdir in self._roots for parentdir in pathutil.finddirs(dir) - ) + or path_or_parents_in_set(dir, self._roots) ) @propertycache @@ -726,11 +749,8 @@ # visitdir, that's handled below. if ( b'' in self._roots - or dir in self._roots or dir in self._dirs - or any( - parentdir in self._roots for parentdir in pathutil.finddirs(dir) - ) + or path_or_parents_in_set(dir, self._roots) ): return b'this'