rust-status: only visit parts of the tree requested by the matcher
This is an optimization that the matcher is designed to support, but
we weren't doing it until now. This is primarily relevant for
supporting "hg status [FILES]", where this optimization is crucial for
getting good performance (without this optimization, that command will
still scan the entire tree, and just filter it down after the fact).
When this optimization fires we have to return false from
traverse_fs_directory_and_dirstate, representing that that part of the
tree *might* have new files which we didn't see because we skipped
parts of it. This only affects the cached result of the status, and
is necessary to make future status operations (which might use a
different matcher) work properly.
--- a/rust/hg-core/src/dirstate_tree/status.rs Wed Aug 02 10:21:18 2023 -0400
+++ b/rust/hg-core/src/dirstate_tree/status.rs Wed Aug 02 10:33:11 2023 -0400
@@ -8,7 +8,7 @@
use crate::dirstate_tree::dirstate_map::NodeRef;
use crate::dirstate_tree::on_disk::DirstateV2ParseError;
use crate::matchers::get_ignore_function;
-use crate::matchers::Matcher;
+use crate::matchers::{Matcher, VisitChildrenSet};
use crate::utils::files::get_bytes_from_os_string;
use crate::utils::files::get_bytes_from_path;
use crate::utils::files::get_path_from_bytes;
@@ -382,6 +382,16 @@
false
}
+ fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool {
+ match set {
+ VisitChildrenSet::This | VisitChildrenSet::Recursive => true,
+ VisitChildrenSet::Empty => false,
+ VisitChildrenSet::Set(children_to_visit) => {
+ children_to_visit.contains(basename)
+ }
+ }
+ }
+
/// Returns whether all child entries of the filesystem directory have a
/// corresponding dirstate node or are ignored.
fn traverse_fs_directory_and_dirstate<'ancestor>(
@@ -393,14 +403,24 @@
cached_directory_mtime: Option<TruncatedTimestamp>,
is_at_repo_root: bool,
) -> Result<bool, DirstateV2ParseError> {
+ let children_set = self.matcher.visit_children_set(directory_hg_path);
+ if let VisitChildrenSet::Empty = children_set {
+ return Ok(false);
+ }
if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) {
dirstate_nodes
.par_iter()
.map(|dirstate_node| {
let fs_path = &directory_entry.fs_path;
- let fs_path = fs_path.join(get_path_from_bytes(
- dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
- ));
+ let basename =
+ dirstate_node.base_name(self.dmap.on_disk)?.as_bytes();
+ let fs_path = fs_path.join(get_path_from_bytes(basename));
+ if !Self::should_visit(
+ &children_set,
+ HgPath::new(basename),
+ ) {
+ return Ok(());
+ }
match std::fs::symlink_metadata(&fs_path) {
Ok(fs_metadata) => {
let file_type = fs_metadata.file_type().into();
@@ -483,6 +503,15 @@
.par_bridge()
.map(|pair| {
use itertools::EitherOrBoth::*;
+ let basename = match &pair {
+ Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new(
+ dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(),
+ ),
+ Right(fs_entry) => &fs_entry.hg_path,
+ };
+ if !Self::should_visit(&children_set, basename) {
+ return Ok(false);
+ }
let has_dirstate_node_or_is_ignored = match pair {
Both(dirstate_node, fs_entry) => {
self.traverse_fs_and_dirstate(