dirstate-v2: Drop cached read_dir results after .hgignore changes
Soon we’ll want the status algorithm to be able to skip `std::fs::read_dir` in
more cases, notabling when listing unknown files but not ignored files.
When ignore patterns change (which we detect by their hash, added to the
dirstate-v2 format in a previous changeset), a formerly-ignored file could
become unknown without changing its parent directory’s modification time.
Therefore we remove any directory mtime from the dirstate, effictively
invalidating the existing caches.
Differential Revision: https://phab.mercurial-scm.org/D10907
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Jun 22 22:52:08 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Jun 04 15:26:38 2021 +0200
@@ -495,6 +495,19 @@
}
}
+ pub(super) fn get_or_insert<'tree, 'path>(
+ &'tree mut self,
+ path: &HgPath,
+ ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> {
+ Self::get_or_insert_node(
+ self.on_disk,
+ &mut self.root,
+ path,
+ WithBasename::to_cow_owned,
+ |_| {},
+ )
+ }
+
pub(super) fn get_or_insert_node<'tree, 'path>(
on_disk: &'on_disk [u8],
root: &'tree mut ChildNodes<'on_disk>,
--- a/rust/hg-core/src/dirstate_tree/status.rs Tue Jun 22 22:52:08 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs Fri Jun 04 15:26:38 2021 +0200
@@ -6,7 +6,6 @@
use crate::dirstate_tree::dirstate_map::NodeRef;
use crate::dirstate_tree::on_disk::DirstateV2ParseError;
use crate::dirstate_tree::on_disk::Timestamp;
-use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::matchers::get_ignore_function;
use crate::matchers::Matcher;
use crate::utils::files::get_bytes_from_os_string;
@@ -70,6 +69,7 @@
outcome: Default::default(),
ignore_patterns_have_changed: patterns_changed,
new_cachable_directories: Default::default(),
+ outated_cached_directories: Default::default(),
filesystem_time_at_status_start: filesystem_now(&root_dir).ok(),
};
let is_at_repo_root = true;
@@ -91,18 +91,22 @@
)?;
let mut outcome = common.outcome.into_inner().unwrap();
let new_cachable = common.new_cachable_directories.into_inner().unwrap();
+ let outdated = common.outated_cached_directories.into_inner().unwrap();
outcome.dirty = common.ignore_patterns_have_changed == Some(true)
+ || !outdated.is_empty()
|| !new_cachable.is_empty();
+ // Remove outdated mtimes before adding new mtimes, in case a given
+ // directory is both
+ for path in &outdated {
+ let node = dmap.get_or_insert(path)?;
+ if let NodeData::CachedDirectory { .. } = &node.data {
+ node.data = NodeData::None
+ }
+ }
for (path, mtime) in &new_cachable {
- let node = DirstateMap::get_or_insert_node(
- dmap.on_disk,
- &mut dmap.root,
- path,
- WithBasename::to_cow_owned,
- |_| {},
- )?;
+ let node = dmap.get_or_insert(path)?;
match &node.data {
NodeData::Entry(_) => {} // Don’t overwrite an entry
NodeData::CachedDirectory { .. } | NodeData::None => {
@@ -123,6 +127,7 @@
ignore_fn: IgnoreFnType<'a>,
outcome: Mutex<DirstateStatus<'on_disk>>,
new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
+ outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
/// Whether ignore files like `.hgignore` have changed since the previous
/// time a `status()` call wrote their hash to the dirstate. `None` means
@@ -155,6 +160,22 @@
.push((hg_path.to_owned().into(), BadMatch::OsError(errno)))
}
+ fn check_for_outdated_directory_cache(
+ &self,
+ dirstate_node: &NodeRef<'tree, 'on_disk>,
+ ) -> Result<(), DirstateV2ParseError> {
+ if self.ignore_patterns_have_changed == Some(true)
+ && dirstate_node.cached_directory_mtime().is_some()
+ {
+ self.outated_cached_directories.lock().unwrap().push(
+ dirstate_node
+ .full_path_borrowed(self.dmap.on_disk)?
+ .detach_from_tree(),
+ )
+ }
+ Ok(())
+ }
+
/// If this returns true, we can get accurate results by only using
/// `symlink_metadata` for child nodes that exist in the dirstate and don’t
/// need to call `read_dir`.
@@ -304,6 +325,7 @@
dirstate_node: NodeRef<'tree, 'on_disk>,
has_ignored_ancestor: bool,
) -> Result<(), DirstateV2ParseError> {
+ self.check_for_outdated_directory_cache(&dirstate_node)?;
let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?;
let file_type = fs_metadata.file_type();
let file_or_symlink = file_type.is_file() || file_type.is_symlink();
@@ -521,6 +543,7 @@
&self,
dirstate_node: NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
+ self.check_for_outdated_directory_cache(&dirstate_node)?;
self.mark_removed_or_deleted_if_file(
&dirstate_node.full_path_borrowed(self.dmap.on_disk)?,
dirstate_node.state()?,