# HG changeset patch # User Drew Gottlieb # Date 1428342713 25200 # Node ID 36872036169b27885abb2e464a637e15e9b60de1 # Parent 21e1ece30f8c35991d37113ec7c1b72001210ee8 treemanifest: further optimize treemanifest.matches() The matches function was previously traversing all submanifests to look for matching files, even though it was possible to know if a submanifest won't contain any matches. This change adds a visitdir function on the match object to decide quickly if a directory should be visited when traversing. The function also decides if _all_ subdirectories should be traversed. Adding this logic as methods on the match object also makes the logic modifiable by extensions, such as largefiles. An example of a command this speeds up is running hg status --rev .^ python/ on the Mozilla repo with the treemanifest experiment enabled. It goes from 2.03s to 1.85s. More improvements to speed from this change will happen when treemanifests are lazily loaded. Because a flat manifest is still loaded and then converted into treemanifests, speed improvements are limited. This change has no negative effect on speed. For a worst-case example, this command is not negatively impacted: hg status --rev .^ 'relglob:*.js' on the Mozilla repo. It goes from 2.83s to 2.82s. diff -r 21e1ece30f8c -r 36872036169b mercurial/manifest.py --- a/mercurial/manifest.py Mon Apr 06 14:36:08 2015 -0700 +++ b/mercurial/manifest.py Mon Apr 06 10:51:53 2015 -0700 @@ -581,11 +581,19 @@ return self._matches(match) - def _matches(self, match): + def _matches(self, match, alldirs=False): '''recursively generate a new manifest filtered by the match argument. - ''' + + Will visit all subdirectories if alldirs is True, otherwise it will + only visit subdirectories for which match.visitdir is True.''' ret = treemanifest(self._dir) + if not alldirs: + # substring to strip trailing slash + visit = match.visitdir(self._dir[:-1] or '.') + if not visit: + return ret + alldirs = (visit == 'all') for fn in self._files: fullp = self._subpath(fn) @@ -596,7 +604,7 @@ ret._flags[fn] = self._flags[fn] for dir, subm in self._dirs.iteritems(): - m = subm._matches(match) + m = subm._matches(match, alldirs) if not m._isempty(): ret._dirs[dir] = m diff -r 21e1ece30f8c -r 36872036169b mercurial/match.py --- a/mercurial/match.py Mon Apr 06 14:36:08 2015 -0700 +++ b/mercurial/match.py Mon Apr 06 10:51:53 2015 -0700 @@ -9,6 +9,8 @@ import util, pathutil from i18n import _ +propertycache = util.propertycache + def _rematcher(regex): '''compile the regexp with the best available regexp engine and return a matcher function''' @@ -157,6 +159,20 @@ else: optimal roots''' return self._files + @propertycache + def _dirs(self): + return set(util.dirs(self._fmap)) | set(['.']) + + def visitdir(self, dir): + '''Helps while traversing a directory tree. Returns the string 'all' if + the given directory and all subdirectories should be visited. Otherwise + returns True or False indicating whether the given directory should be + visited. If 'all' is returned, calling this method on a subdirectory + gives an undefined result.''' + if not self._fmap or self.exact(dir): + return 'all' + return dir in self._dirs + def exact(self, f): '''Returns True if f is in .files().''' return f in self._fmap