Mercurial > hg

--- a/mercurial/match.py	Tue Mar 30 13:05:22 2021 -0700
+++ b/mercurial/match.py	Wed Mar 31 12:46:54 2021 -0700
@@ -7,6 +7,7 @@

 from __future__ import absolute_import, print_function

+import bisect
 import copy
 import itertools
 import os
@@ -798,14 +799,38 @@
     def visitdir(self, dir):
         return dir in self._dirs

+    @propertycache
+    def _visitchildrenset_candidates(self):
+        """A memoized set of candidates for visitchildrenset."""
+        return self._fileset | self._dirs - {b''}
+
+    @propertycache
+    def _sorted_visitchildrenset_candidates(self):
+        """A memoized sorted list of candidates for visitchildrenset."""
+        return sorted(self._visitchildrenset_candidates)
+
     def visitchildrenset(self, dir):
         if not self._fileset or dir not in self._dirs:
             return set()

-        candidates = self._fileset | self._dirs - {b''}
-        if dir != b'':
+        if dir == b'':
+            candidates = self._visitchildrenset_candidates
+        else:
+            candidates = self._sorted_visitchildrenset_candidates
             d = dir + b'/'
-            candidates = {c[len(d) :] for c in candidates if c.startswith(d)}
+            # Use bisect to find the first element potentially starting with d
+            # (i.e. >= d). This should always find at least one element (we'll
+            # assert later if this is not the case).
+            first = bisect.bisect_left(candidates, d)
+            # We need a representation of the first element that is > d that
+            # does not start with d, so since we added a `/` on the end of dir,
+            # we'll add whatever comes after slash (we could probably assume
+            # that `0` is after `/`, but let's not) to the end of dir instead.
+            dnext = dir + encoding.strtolocal(chr(ord(b'/') + 1))
+            # Use bisect to find the first element >= d_next
+            last = bisect.bisect_left(candidates, dnext, lo=first)
+            dlen = len(d)
+            candidates = {c[dlen:] for c in candidates[first:last]}
         # self._dirs includes all of the directories, recursively, so if
         # we're attempting to match foo/bar/baz.txt, it'll have '', 'foo',
         # 'foo/bar' in it. Thus we can safely ignore a candidate that has a