match: make exactmatcher.visitchildrenset return file children as well
Previously, if we had an exactmatcher like ['foo.txt', 'a/bar.txt', 'a/b/c/baz.txt'], we'd
get back the following data:
'.': {'a'}
'a': {'b'}
'a/b': {'c'}
'a/b/c': 'this'
'a/b/c/d': set()
This was incorrect, since visitchildrenset explicitly says not to pay attention
to 'foo.txt' and 'a/bar.txt' by not returning them or 'this'. Given the near
impossibility of making visitchildrenset reliabbly produce only subdirectories,
a previous commit has made it documented and expected that visitchildrenset can
return a set containing both files and subdirectories to visit, instead of
implying/requiring that visitchildrenset() return 'this' if there are files to
visit. This makes the code for exactmatcher match this clarified documentation.
Differential Revision: https://phab.mercurial-scm.org/D4365
--- a/mercurial/match.py Thu Aug 23 18:04:15 2018 -0700
+++ b/mercurial/match.py Fri Aug 24 10:19:31 2018 -0700
@@ -587,23 +587,24 @@
return dir in self._dirs
def visitchildrenset(self, dir):
- if dir in self._dirs:
- candidates = self._dirs - {'.'}
- if dir != '.':
- d = dir + '/'
- candidates = set(c[len(d):] for c in candidates if
- c.startswith(d))
- # self._dirs includes all of the directories, recursively, so if
- # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
- # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
- # '/' in it, indicating a it's for a subdir-of-a-subdir; the
- # immediate subdir will be in there without a slash.
- ret = set(c for c in candidates if '/' not in c)
- # We need to emit 'this' for foo/bar, not set(), not {'baz.txt'}.
- if not ret:
- return 'this'
- return ret
- return set()
+ if not self._fileset or dir not in self._dirs:
+ return set()
+
+ candidates = self._fileset | self._dirs - {'.'}
+ if dir != '.':
+ d = dir + '/'
+ candidates = set(c[len(d):] for c in candidates if
+ c.startswith(d))
+ # self._dirs includes all of the directories, recursively, so if
+ # we're attempting to match foo/bar/baz.txt, it'll have '.', 'foo',
+ # 'foo/bar' in it. Thus we can safely ignore a candidate that has a
+ # '/' in it, indicating a it's for a subdir-of-a-subdir; the
+ # immediate subdir will be in there without a slash.
+ ret = {c for c in candidates if '/' not in c}
+ # We really do not expect ret to be empty, since that would imply that
+ # there's something in _dirs that didn't have a file in _fileset.
+ assert ret
+ return ret
def isexact(self):
return True
--- a/tests/test-match.py Thu Aug 23 18:04:15 2018 -0700
+++ b/tests/test-match.py Fri Aug 24 10:19:31 2018 -0700
@@ -202,11 +202,27 @@
assert isinstance(m, matchmod.exactmatcher)
self.assertEqual(m.visitchildrenset(b'.'), {b'dir'})
self.assertEqual(m.visitchildrenset(b'dir'), {b'subdir'})
- self.assertEqual(m.visitchildrenset(b'dir/subdir'), b'this')
+ self.assertEqual(m.visitchildrenset(b'dir/subdir'), {b'foo.txt'})
self.assertEqual(m.visitchildrenset(b'dir/subdir/x'), set())
self.assertEqual(m.visitchildrenset(b'dir/subdir/foo.txt'), set())
self.assertEqual(m.visitchildrenset(b'folder'), set())
+ def testVisitchildrensetFilesAndDirs(self):
+ m = matchmod.match(b'x', b'', patterns=[b'rootfile.txt',
+ b'a/file1.txt',
+ b'a/b/file2.txt',
+ # no file in a/b/c
+ b'a/b/c/d/file4.txt'],
+ exact=True)
+ assert isinstance(m, matchmod.exactmatcher)
+ self.assertEqual(m.visitchildrenset(b'.'), {b'a', b'rootfile.txt'})
+ self.assertEqual(m.visitchildrenset(b'a'), {b'b', b'file1.txt'})
+ self.assertEqual(m.visitchildrenset(b'a/b'), {b'c', b'file2.txt'})
+ self.assertEqual(m.visitchildrenset(b'a/b/c'), {b'd'})
+ self.assertEqual(m.visitchildrenset(b'a/b/c/d'), {b'file4.txt'})
+ self.assertEqual(m.visitchildrenset(b'a/b/c/d/e'), set())
+ self.assertEqual(m.visitchildrenset(b'folder'), set())
+
class DifferenceMatcherTests(unittest.TestCase):
def testVisitdirM2always(self):