match: skip walking up the directory hierarchy if the number of pats are small
Previously, we would receive a path like abc/def/ghi and "walk up" the directory
hierarchy, checking abc/def, abc, and `b''` to see if they were in the set of
prefixes that this matcher covered. We did this indiscriminately - we generated
all of these paths even if the set of prefixes the matcher covered was
completely empty, which is the case for a lot of repos at my company (the narrow
matcher we use is usually non-recursive).
This brings the time for a rebase in one of my repos from 12.20s to 10.87s. In
this particular repo, this is entirely due to the `len(prefix_set) == 0` check,
as I do not have any recursive patterns in the narrowspec.
Differential Revision: https://phab.mercurial-scm.org/D9488
--- a/mercurial/match.py Tue Dec 01 22:19:01 2020 +0100
+++ b/mercurial/match.py Mon Nov 30 12:30:58 2020 -0800
@@ -563,6 +563,36 @@
return b'<predicatenmatcher pred=%s>' % s
+def path_or_parents_in_set(path, prefix_set):
+ """Returns True if `path` (or any parent of `path`) is in `prefix_set`."""
+ l = len(prefix_set)
+ if l == 0:
+ return False
+ if path in prefix_set:
+ return True
+ # If there's more than 5 paths in prefix_set, it's *probably* quicker to
+ # "walk up" the directory hierarchy instead, with the assumption that most
+ # directory hierarchies are relatively shallow and hash lookup is cheap.
+ if l > 5:
+ return any(
+ parentdir in prefix_set for parentdir in pathutil.finddirs(path)
+ )
+
+ # FIXME: Ideally we'd never get to this point if this is the case - we'd
+ # recognize ourselves as an 'always' matcher and skip this.
+ if b'' in prefix_set:
+ return True
+
+ if pycompat.ispy3:
+ sl = ord(b'/')
+ else:
+ sl = '/'
+
+ # We already checked that path isn't in prefix_set exactly, so
+ # `path[len(pf)] should never raise IndexError.
+ return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set)
+
+
class patternmatcher(basematcher):
r"""Matches a set of (kind, pat, source) against a 'root' directory.
@@ -611,12 +641,8 @@
if self._prefix and dir in self._fileset:
return b'all'
return (
- dir in self._fileset
- or dir in self._dirs
- or any(
- parentdir in self._fileset
- for parentdir in pathutil.finddirs(dir)
- )
+ dir in self._dirs
+ or path_or_parents_in_set(dir, self._fileset)
)
def visitchildrenset(self, dir):
@@ -698,12 +724,9 @@
if self._prefix and dir in self._roots:
return b'all'
return (
- dir in self._roots
- or dir in self._dirs
+ dir in self._dirs
or dir in self._parents
- or any(
- parentdir in self._roots for parentdir in pathutil.finddirs(dir)
- )
+ or path_or_parents_in_set(dir, self._roots)
)
@propertycache
@@ -726,11 +749,8 @@
# visitdir, that's handled below.
if (
b'' in self._roots
- or dir in self._roots
or dir in self._dirs
- or any(
- parentdir in self._roots for parentdir in pathutil.finddirs(dir)
- )
+ or path_or_parents_in_set(dir, self._roots)
):
return b'this'