# HG changeset patch # User Kyle Lippincott # Date 1534558733 25200 # Node ID 35ecaa999a12b2b2a02f15f108a0552e0937bcfb # Parent bc3b99d5627ed189797155fbeee458ff398b90ff match: improve includematcher.visitchildrenset to be much faster and cached This improves the speed of visitchildrenset considerably, especially when there are complicated matchers involved that may have many entries in _dirs or _parents. Unfortunately the benchmark isn't easily upstreamed due to its reliance on https://github.com/vstinner/perf (primarily due to the conflict when importing it if I were to contribute the benchmark as contrib/matcherbenchmarks.py) instead of asv or some other perf measurement system. To describe the benchmark briefly: I generated an includematcher of either 5 or 3500 "rootfilesin:prefix1/prefix2/prefix3/" items in the 'setup' function, and then called `im.visitchildrenset('prefix1/prefix2')` in the 'stmt' function in perf.timeit. For the set of 5: - before: 15.3 us +- 2.9 us - after: 1.59 us +- 0.02 us For the set of 3500: - before: 3.90 ms +- 0.10 ms - after: 3.15 us +- 0.09 us (note the m->u change) Differential Revision: https://phab.mercurial-scm.org/D4351 diff -r bc3b99d5627e -r 35ecaa999a12 mercurial/match.py --- a/mercurial/match.py Thu Sep 06 03:21:05 2018 +0530 +++ b/mercurial/match.py Fri Aug 17 19:18:53 2018 -0700 @@ -496,6 +496,46 @@ def __repr__(self): return ('' % pycompat.bytestr(self._pats)) +# This is basically a reimplementation of util.dirs that stores the children +# instead of just a count of them, plus a small optional optimization to avoid +# some directories we don't need. +class _dirchildren(object): + def __init__(self, paths, onlyinclude=None): + self._dirs = {} + self._onlyinclude = onlyinclude or [] + addpath = self.addpath + for f in paths: + addpath(f) + + def addpath(self, path): + if path == '.': + return + dirs = self._dirs + findsplitdirs = _dirchildren._findsplitdirs + for d, b in findsplitdirs(path): + if d not in self._onlyinclude: + continue + dirs.setdefault(d, set()).add(b) + + @staticmethod + def _findsplitdirs(path): + # yields (dirname, basename) tuples, walking back to the root. This is + # very similar to util.finddirs, except: + # - produces a (dirname, basename) tuple, not just 'dirname' + # - includes root dir + # Unlike manifest._splittopdir, this does not suffix `dirname` with a + # slash, and produces '.' for the root instead of ''. + oldpos = len(path) + pos = path.rfind('/') + while pos != -1: + yield path[:pos], path[pos + 1:oldpos] + oldpos = pos + pos = path.rfind('/', 0, pos) + yield '.', path[:oldpos] + + def get(self, path): + return self._dirs.get(path, set()) + class includematcher(basematcher): def __init__(self, root, cwd, kindpats, listsubrepos=False, badfn=None): @@ -523,6 +563,18 @@ any(parentdir in self._roots for parentdir in util.finddirs(dir))) + @propertycache + def _allparentschildren(self): + # It may seem odd that we add dirs, roots, and parents, and then + # restrict to only parents. This is to catch the case of: + # dirs = ['foo/bar'] + # parents = ['foo'] + # if we asked for the children of 'foo', but had only added + # self._parents, we wouldn't be able to respond ['bar']. + return _dirchildren( + itertools.chain(self._dirs, self._roots, self._parents), + onlyinclude=self._parents) + def visitchildrenset(self, dir): if self._prefix and dir in self._roots: return 'all' @@ -535,30 +587,9 @@ for parentdir in util.finddirs(dir))): return 'this' - ret = set() if dir in self._parents: - # We add a '/' on to `dir` so that we don't return items that are - # prefixed by `dir` but are actually siblings of `dir`. - suffixeddir = dir + '/' if dir != '.' else '' - # Look in all _roots, _dirs, and _parents for things that start with - # 'suffixeddir'. - for d in [q for q in - itertools.chain(self._roots, self._dirs, self._parents) if - q.startswith(suffixeddir)]: - # Don't emit '.' in the response for the root directory - if not suffixeddir and d == '.': - continue - - # We return the item name without the `suffixeddir` prefix or a - # slash suffix - d = d[len(suffixeddir):] - if '/' in d: - # This is a subdirectory-of-a-subdirectory, i.e. - # suffixeddir='foo/', d was 'foo/bar/baz' before removing - # 'foo/'. - d = d[:d.index('/')] - ret.add(d) - return ret + return self._allparentschildren.get(dir) or set() + return set() @encoding.strmethod def __repr__(self): @@ -1238,6 +1269,10 @@ # util.dirs() does not include the root directory, so add it manually p.append('.') + # FIXME: all uses of this function convert these to sets, do so before + # returning. + # FIXME: all uses of this function do not need anything in 'roots' and + # 'dirs' to also be in 'parents', consider removing them before returning. return r, d, p def _explicitfiles(kindpats):