# HG changeset patch # User Arseniy Alekseyev # Date 1665409719 -3600 # Node ID eb02decdf0abd2f0c9252e299546c44f6b7ca3e9 # Parent 943509a58d29abf63ceb37b2315202432e32f2e5 dirstate-v2: skip evaluation of hgignore regex on cached directories By making the computation of [has_ignored_ancestor] lazy we're eliding its computation in the common case when none of its descendants have changed on disk. On a ~400k files repo, with a cached status, we saw a ~64% reduction in CPU time, resulting in a speedup of ~10-15% (on ZFS), and a speedup of ~38% of XFS (XFS has faster stat operations for some reason). diff -r 943509a58d29 -r eb02decdf0ab rust/Cargo.lock --- a/rust/Cargo.lock Fri Sep 30 09:05:48 2022 -0600 +++ b/rust/Cargo.lock Mon Oct 10 14:48:39 2022 +0100 @@ -468,6 +468,7 @@ "log", "memmap2", "micro-timer", + "once_cell", "ouroboros", "pretty_assertions", "rand 0.8.5", @@ -687,6 +688,12 @@ ] [[package]] +name = "once_cell" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f7254b99e31cad77da24b08ebf628882739a608578bb1bcdfc1f9c21260d7c0" + +[[package]] name = "opaque-debug" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" diff -r 943509a58d29 -r eb02decdf0ab rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml Fri Sep 30 09:05:48 2022 -0600 +++ b/rust/hg-core/Cargo.toml Mon Oct 10 14:48:39 2022 +0100 @@ -35,6 +35,9 @@ memmap2 = { version = "0.5.3", features = ["stable_deref_trait"] } zstd = "0.5.3" format-bytes = "0.3.0" +# once_cell 1.15 uses edition 2021, while the heptapod CI +# uses an old version of Cargo that doesn't support it. +once_cell = "=1.14.0" # We don't use the `miniz-oxide` backend to not change rhg benchmarks and until # we have a clearer view of which backend is the fastest. diff -r 943509a58d29 -r eb02decdf0ab rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs Fri Sep 30 09:05:48 2022 -0600 +++ b/rust/hg-core/src/dirstate_tree/status.rs Mon Oct 10 14:48:39 2022 +0100 @@ -20,6 +20,7 @@ use crate::StatusError; use crate::StatusOptions; use micro_timer::timed; +use once_cell::sync::OnceCell; use rayon::prelude::*; use sha1::{Digest, Sha1}; use std::borrow::Cow; @@ -126,14 +127,14 @@ }; let is_at_repo_root = true; let hg_path = &BorrowedPath::OnDisk(HgPath::new("")); - let has_ignored_ancestor = false; + let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path); let root_cached_mtime = None; let root_dir_metadata = None; // If the path we have for the repository root is a symlink, do follow it. // (As opposed to symlinks within the working directory which are not // followed, using `std::fs::symlink_metadata`.) common.traverse_fs_directory_and_dirstate( - has_ignored_ancestor, + &has_ignored_ancestor, dmap.root.as_ref(), hg_path, &root_dir, @@ -196,6 +197,40 @@ Unsure, } +/// Lazy computation of whether a given path has a hgignored +/// ancestor. +struct HasIgnoredAncestor<'a> { + /// `path` and `parent` constitute the inputs to the computation, + /// `cache` stores the outcome. + path: &'a HgPath, + parent: Option<&'a HasIgnoredAncestor<'a>>, + cache: OnceCell, +} + +impl<'a> HasIgnoredAncestor<'a> { + fn create( + parent: Option<&'a HasIgnoredAncestor<'a>>, + path: &'a HgPath, + ) -> HasIgnoredAncestor<'a> { + Self { + path, + parent, + cache: OnceCell::new(), + } + } + + fn force<'b>(&self, ignore_fn: &IgnoreFnType<'b>) -> bool { + match self.parent { + None => false, + Some(parent) => { + *(parent.cache.get_or_init(|| { + parent.force(ignore_fn) || ignore_fn(&self.path) + })) + } + } + } +} + impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { fn push_outcome( &self, @@ -318,9 +353,9 @@ /// Returns whether all child entries of the filesystem directory have a /// corresponding dirstate node or are ignored. - fn traverse_fs_directory_and_dirstate( + fn traverse_fs_directory_and_dirstate<'ancestor>( &self, - has_ignored_ancestor: bool, + has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>, dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, directory_hg_path: &BorrowedPath<'tree, 'on_disk>, directory_fs_path: &Path, @@ -418,7 +453,7 @@ } Right(fs_entry) => { has_dirstate_node_or_is_ignored = self.traverse_fs_only( - has_ignored_ancestor, + has_ignored_ancestor.force(&self.ignore_fn), directory_hg_path, fs_entry, ) @@ -429,12 +464,12 @@ .try_reduce(|| true, |a, b| Ok(a && b)) } - fn traverse_fs_and_dirstate( + fn traverse_fs_and_dirstate<'ancestor>( &self, fs_path: &Path, fs_metadata: &std::fs::Metadata, dirstate_node: NodeRef<'tree, 'on_disk>, - has_ignored_ancestor: bool, + has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>, ) -> Result<(), DirstateV2ParseError> { self.check_for_outdated_directory_cache(&dirstate_node)?; let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?; @@ -454,11 +489,14 @@ .traversed .push(hg_path.detach_from_tree()) } - let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); + let is_ignored = HasIgnoredAncestor::create( + Some(&has_ignored_ancestor), + hg_path, + ); let is_at_repo_root = false; let children_all_have_dirstate_node_or_are_ignored = self .traverse_fs_directory_and_dirstate( - is_ignored, + &is_ignored, dirstate_node.children(self.dmap.on_disk)?, hg_path, fs_path, @@ -472,14 +510,14 @@ dirstate_node, )? } else { - if file_or_symlink && self.matcher.matches(hg_path) { + if file_or_symlink && self.matcher.matches(&hg_path) { if let Some(entry) = dirstate_node.entry()? { if !entry.any_tracked() { // Forward-compat if we start tracking unknown/ignored // files for caching reasons self.mark_unknown_or_ignored( - has_ignored_ancestor, - hg_path, + has_ignored_ancestor.force(&self.ignore_fn), + &hg_path, ); } if entry.added() { @@ -495,7 +533,7 @@ // `node.entry.is_none()` indicates a "directory" // node, but the filesystem has a file self.mark_unknown_or_ignored( - has_ignored_ancestor, + has_ignored_ancestor.force(&self.ignore_fn), hg_path, ); }