rust-matchers: implement `visit_children_set` for `FileMatcher`
As per the removed inline comment, this will become useful in a future patch
in this series as the `IncludeMatcher` is introduced.
Differential Revision: https://phab.mercurial-scm.org/D7914
// matchers.rs
//
// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
//! Structs and types for matching files and directories.
use crate::{utils::hg_path::HgPath, DirsMultiset, DirstateMapError};
use std::collections::HashSet;
use std::iter::FromIterator;
use std::ops::Deref;
#[derive(Debug, PartialEq)]
pub enum VisitChildrenSet<'a> {
/// Don't visit anything
Empty,
/// Only visit this directory
This,
/// Visit this directory and these subdirectories
/// TODO Should we implement a `NonEmptyHashSet`?
Set(HashSet<&'a HgPath>),
/// Visit this directory and all subdirectories
Recursive,
}
pub trait Matcher {
/// Explicitly listed files
fn file_set(&self) -> Option<&HashSet<&HgPath>>;
/// Returns whether `filename` is in `file_set`
fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool;
/// Returns whether `filename` is matched by this matcher
fn matches(&self, filename: impl AsRef<HgPath>) -> bool;
/// Decides whether a directory should be visited based on whether it
/// has potential matches in it or one of its subdirectories, and
/// potentially lists which subdirectories of that directory should be
/// visited. This is based on the match's primary, included, and excluded
/// patterns.
///
/// # Example
///
/// Assume matchers `['path:foo/bar', 'rootfilesin:qux']`, we would
/// return the following values (assuming the implementation of
/// visit_children_set is capable of recognizing this; some implementations
/// are not).
///
/// ```text
/// ```ignore
/// '' -> {'foo', 'qux'}
/// 'baz' -> set()
/// 'foo' -> {'bar'}
/// // Ideally this would be `Recursive`, but since the prefix nature of
/// // matchers is applied to the entire matcher, we have to downgrade this
/// // to `This` due to the (yet to be implemented in Rust) non-prefix
/// // `RootFilesIn'-kind matcher being mixed in.
/// 'foo/bar' -> 'this'
/// 'qux' -> 'this'
/// ```
/// # Important
///
/// Most matchers do not know if they're representing files or
/// directories. They see `['path:dir/f']` and don't know whether `f` is a
/// file or a directory, so `visit_children_set('dir')` for most matchers
/// will return `HashSet{ HgPath { "f" } }`, but if the matcher knows it's
/// a file (like the yet to be implemented in Rust `ExactMatcher` does),
/// it may return `VisitChildrenSet::This`.
/// Do not rely on the return being a `HashSet` indicating that there are
/// no files in this dir to investigate (or equivalently that if there are
/// files to investigate in 'dir' that it will always return
/// `VisitChildrenSet::This`).
fn visit_children_set(
&self,
directory: impl AsRef<HgPath>,
) -> VisitChildrenSet;
/// Matcher will match everything and `files_set()` will be empty:
/// optimization might be possible.
fn matches_everything(&self) -> bool;
/// Matcher will match exactly the files in `files_set()`: optimization
/// might be possible.
fn is_exact(&self) -> bool;
}
/// Matches everything.
///```
/// use hg::{ matchers::{Matcher, AlwaysMatcher}, utils::hg_path::HgPath };
///
/// let matcher = AlwaysMatcher;
///
/// assert_eq!(matcher.matches(HgPath::new(b"whatever")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), true);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
/// ```
#[derive(Debug)]
pub struct AlwaysMatcher;
impl Matcher for AlwaysMatcher {
fn file_set(&self) -> Option<&HashSet<&HgPath>> {
None
}
fn exact_match(&self, _filename: impl AsRef<HgPath>) -> bool {
false
}
fn matches(&self, _filename: impl AsRef<HgPath>) -> bool {
true
}
fn visit_children_set(
&self,
_directory: impl AsRef<HgPath>,
) -> VisitChildrenSet {
VisitChildrenSet::Recursive
}
fn matches_everything(&self) -> bool {
true
}
fn is_exact(&self) -> bool {
false
}
}
/// Matches the input files exactly. They are interpreted as paths, not
/// patterns.
///
///```
/// use hg::{ matchers::{Matcher, FileMatcher}, utils::hg_path::HgPath };
///
/// let files = [HgPath::new(b"a.txt"), HgPath::new(br"re:.*\.c$")];
/// let matcher = FileMatcher::new(&files).unwrap();
///
/// assert_eq!(matcher.matches(HgPath::new(b"a.txt")), true);
/// assert_eq!(matcher.matches(HgPath::new(b"b.txt")), false);
/// assert_eq!(matcher.matches(HgPath::new(b"main.c")), false);
/// assert_eq!(matcher.matches(HgPath::new(br"re:.*\.c$")), true);
/// ```
#[derive(Debug)]
pub struct FileMatcher<'a> {
files: HashSet<&'a HgPath>,
dirs: DirsMultiset,
}
impl<'a> FileMatcher<'a> {
pub fn new(
files: &'a [impl AsRef<HgPath>],
) -> Result<Self, DirstateMapError> {
Ok(Self {
files: HashSet::from_iter(files.iter().map(|f| f.as_ref())),
dirs: DirsMultiset::from_manifest(files)?,
})
}
fn inner_matches(&self, filename: impl AsRef<HgPath>) -> bool {
self.files.contains(filename.as_ref())
}
}
impl<'a> Matcher for FileMatcher<'a> {
fn file_set(&self) -> Option<&HashSet<&HgPath>> {
Some(&self.files)
}
fn exact_match(&self, filename: impl AsRef<HgPath>) -> bool {
self.inner_matches(filename)
}
fn matches(&self, filename: impl AsRef<HgPath>) -> bool {
self.inner_matches(filename)
}
fn visit_children_set(
&self,
directory: impl AsRef<HgPath>,
) -> VisitChildrenSet {
if self.files.is_empty() || !self.dirs.contains(&directory) {
return VisitChildrenSet::Empty;
}
let dirs_as_set = self.dirs.iter().map(|k| k.deref()).collect();
let mut candidates: HashSet<&HgPath> =
self.files.union(&dirs_as_set).map(|k| *k).collect();
candidates.remove(HgPath::new(b""));
if !directory.as_ref().is_empty() {
let directory = [directory.as_ref().as_bytes(), b"/"].concat();
candidates = candidates
.iter()
.filter_map(|c| {
if c.as_bytes().starts_with(&directory) {
Some(HgPath::new(&c.as_bytes()[directory.len()..]))
} else {
None
}
})
.collect();
}
// `self.dirs` includes all of the directories, recursively, so if
// we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',
// 'foo/bar' in it. Thus we can safely ignore a candidate that has a
// '/' in it, indicating it's for a subdir-of-a-subdir; the immediate
// subdir will be in there without a slash.
VisitChildrenSet::Set(
candidates
.iter()
.filter_map(|c| {
if c.bytes().all(|b| *b != b'/') {
Some(*c)
} else {
None
}
})
.collect(),
)
}
fn matches_everything(&self) -> bool {
false
}
fn is_exact(&self) -> bool {
true
}
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
#[test]
fn test_filematcher_visit_children_set() {
// Visitchildrenset
let files = vec![HgPath::new(b"dir/subdir/foo.txt")];
let matcher = FileMatcher::new(&files).unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"dir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"subdir"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"foo.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/x")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"dir/subdir/foo.txt")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
#[test]
fn test_filematcher_visit_children_set_files_and_dirs() {
let files = vec![
HgPath::new(b"rootfile.txt"),
HgPath::new(b"a/file1.txt"),
HgPath::new(b"a/b/file2.txt"),
// No file in a/b/c
HgPath::new(b"a/b/c/d/file4.txt"),
];
let matcher = FileMatcher::new(&files).unwrap();
let mut set = HashSet::new();
set.insert(HgPath::new(b"a"));
set.insert(HgPath::new(b"rootfile.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"b"));
set.insert(HgPath::new(b"file1.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"c"));
set.insert(HgPath::new(b"file2.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"d"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c")),
VisitChildrenSet::Set(set)
);
let mut set = HashSet::new();
set.insert(HgPath::new(b"file4.txt"));
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d")),
VisitChildrenSet::Set(set)
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"a/b/c/d/e")),
VisitChildrenSet::Empty
);
assert_eq!(
matcher.visit_children_set(HgPath::new(b"folder")),
VisitChildrenSet::Empty
);
}
}