Mercurial > hg
changeset 45999:c4c7a6b61146
match: skip walking up the directory hierarchy if the number of pats are small
Previously, we would receive a path like abc/def/ghi and "walk up" the directory
hierarchy, checking abc/def, abc, and `b''` to see if they were in the set of
prefixes that this matcher covered. We did this indiscriminately - we generated
all of these paths even if the set of prefixes the matcher covered was
completely empty, which is the case for a lot of repos at my company (the narrow
matcher we use is usually non-recursive).
This brings the time for a rebase in one of my repos from 12.20s to 10.87s. In
this particular repo, this is entirely due to the `len(prefix_set) == 0` check,
as I do not have any recursive patterns in the narrowspec.
Differential Revision: https://phab.mercurial-scm.org/D9488
author | Kyle Lippincott <spectral@google.com> |
---|---|
date | Mon, 30 Nov 2020 12:30:58 -0800 |
parents | cb623dedb17c |
children | c1bb02738f96 |
files | mercurial/match.py |
diffstat | 1 files changed, 35 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/match.py Tue Dec 01 22:19:01 2020 +0100 +++ b/mercurial/match.py Mon Nov 30 12:30:58 2020 -0800 @@ -563,6 +563,36 @@ return b'<predicatenmatcher pred=%s>' % s +def path_or_parents_in_set(path, prefix_set): + """Returns True if `path` (or any parent of `path`) is in `prefix_set`.""" + l = len(prefix_set) + if l == 0: + return False + if path in prefix_set: + return True + # If there's more than 5 paths in prefix_set, it's *probably* quicker to + # "walk up" the directory hierarchy instead, with the assumption that most + # directory hierarchies are relatively shallow and hash lookup is cheap. + if l > 5: + return any( + parentdir in prefix_set for parentdir in pathutil.finddirs(path) + ) + + # FIXME: Ideally we'd never get to this point if this is the case - we'd + # recognize ourselves as an 'always' matcher and skip this. + if b'' in prefix_set: + return True + + if pycompat.ispy3: + sl = ord(b'/') + else: + sl = '/' + + # We already checked that path isn't in prefix_set exactly, so + # `path[len(pf)] should never raise IndexError. + return any(path.startswith(pf) and path[len(pf)] == sl for pf in prefix_set) + + class patternmatcher(basematcher): r"""Matches a set of (kind, pat, source) against a 'root' directory. @@ -611,12 +641,8 @@ if self._prefix and dir in self._fileset: return b'all' return ( - dir in self._fileset - or dir in self._dirs - or any( - parentdir in self._fileset - for parentdir in pathutil.finddirs(dir) - ) + dir in self._dirs + or path_or_parents_in_set(dir, self._fileset) ) def visitchildrenset(self, dir): @@ -698,12 +724,9 @@ if self._prefix and dir in self._roots: return b'all' return ( - dir in self._roots - or dir in self._dirs + dir in self._dirs or dir in self._parents - or any( - parentdir in self._roots for parentdir in pathutil.finddirs(dir) - ) + or path_or_parents_in_set(dir, self._roots) ) @propertycache @@ -726,11 +749,8 @@ # visitdir, that's handled below. if ( b'' in self._roots - or dir in self._roots or dir in self._dirs - or any( - parentdir in self._roots for parentdir in pathutil.finddirs(dir) - ) + or path_or_parents_in_set(dir, self._roots) ): return b'this'