Mercurial > hg
changeset 47338:f27f2afb15da
dirstate-tree: Skip readdir() in `hg status -mard`
When running the status algorithm in a mode where we don’t list unknown
or ignored files, all we care about are files that are listed in the dirstate.
We can there for skip making expensive calls to readdir() to list the contents
of filesystem directories, and instead only run stat() to get the filesystem
state of files listed in the dirstate. (This state may be an error for files
that don’t exist anymore on the filesystem.)
On 16 CPU threads, this reduces the time spent in the `status()` function for
`hg status -mard` on an old snapshot of mozilla-central from ~70ms to ~50ms.
Differential Revision: https://phab.mercurial-scm.org/D10752
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 16:18:16 +0200 |
parents | 0654b3b3d2b5 |
children | 43f6a7bb4e12 |
files | rust/hg-core/src/dirstate_tree/status.rs |
diffstat | 1 files changed, 55 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/status.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/status.rs Wed May 19 16:18:16 2021 +0200 @@ -6,6 +6,7 @@ use crate::matchers::get_ignore_function; use crate::matchers::Matcher; use crate::utils::files::get_bytes_from_os_string; +use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::BadMatch; use crate::DirstateStatus; @@ -83,14 +84,17 @@ fs_path: &Path, is_at_repo_root: bool, ) -> Result<Vec<DirEntry>, ()> { - DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { - let errno = error.raw_os_error().expect("expected real OS error"); - self.outcome - .lock() - .unwrap() - .bad - .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) - }) + DirEntry::read_dir(fs_path, is_at_repo_root) + .map_err(|error| self.io_error(error, hg_path)) + } + + fn io_error(&self, error: std::io::Error, hg_path: &HgPath) { + let errno = error.raw_os_error().expect("expected real OS error"); + self.outcome + .lock() + .unwrap() + .bad + .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) } fn traverse_fs_directory_and_dirstate( @@ -101,6 +105,35 @@ directory_fs_path: &Path, is_at_repo_root: bool, ) -> Result<(), DirstateV2ParseError> { + if !self.options.list_unknown && !self.options.list_ignored { + // We only care about files in the dirstate, so we can skip listing + // filesystem directories entirely. + return dirstate_nodes + .par_iter() + .map(|dirstate_node| { + let fs_path = directory_fs_path.join(get_path_from_bytes( + dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), + )); + match std::fs::symlink_metadata(&fs_path) { + Ok(fs_metadata) => self.traverse_fs_and_dirstate( + &fs_path, + &fs_metadata, + dirstate_node, + has_ignored_ancestor, + ), + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + self.traverse_dirstate_only(dirstate_node) + } + Err(error) => { + let hg_path = + dirstate_node.full_path(self.dmap.on_disk)?; + Ok(self.io_error(error, hg_path)) + } + } + }) + .collect(); + } + let mut fs_entries = if let Ok(entries) = self.read_dir( directory_hg_path, directory_fs_path, @@ -141,7 +174,8 @@ match pair { Both(dirstate_node, fs_entry) => self .traverse_fs_and_dirstate( - fs_entry, + &fs_entry.full_path, + &fs_entry.metadata, dirstate_node, has_ignored_ancestor, ), @@ -160,12 +194,13 @@ fn traverse_fs_and_dirstate( &self, - fs_entry: &DirEntry, + fs_path: &Path, + fs_metadata: &std::fs::Metadata, dirstate_node: NodeRef<'tree, '_>, has_ignored_ancestor: bool, ) -> Result<(), DirstateV2ParseError> { let hg_path = dirstate_node.full_path(self.dmap.on_disk)?; - let file_type = fs_entry.metadata.file_type(); + let file_type = fs_metadata.file_type(); let file_or_symlink = file_type.is_file() || file_type.is_symlink(); if !file_or_symlink { // If we previously had a file here, it was removed (with @@ -186,7 +221,7 @@ is_ignored, dirstate_node.children(self.dmap.on_disk)?, hg_path, - &fs_entry.full_path, + fs_path, is_at_repo_root, )? } else { @@ -209,9 +244,8 @@ .unwrap() .modified .push(full_path), - EntryState::Normal => { - self.handle_normal_file(&dirstate_node, fs_entry)? - } + EntryState::Normal => self + .handle_normal_file(&dirstate_node, fs_metadata)?, // This variant is not used in DirstateMap // nodes EntryState::Unknown => unreachable!(), @@ -239,7 +273,7 @@ fn handle_normal_file( &self, dirstate_node: &NodeRef<'tree, '_>, - fs_entry: &DirEntry, + fs_metadata: &std::fs::Metadata, ) -> Result<(), DirstateV2ParseError> { // Keep the low 31 bits fn truncate_u64(value: u64) -> i32 { @@ -253,13 +287,12 @@ .entry()? .expect("handle_normal_file called with entry-less node"); let full_path = Cow::from(dirstate_node.full_path(self.dmap.on_disk)?); - let mode_changed = || { - self.options.check_exec && entry.mode_changed(&fs_entry.metadata) - }; - let size_changed = entry.size != truncate_u64(fs_entry.metadata.len()); + let mode_changed = + || self.options.check_exec && entry.mode_changed(fs_metadata); + let size_changed = entry.size != truncate_u64(fs_metadata.len()); if entry.size >= 0 && size_changed - && fs_entry.metadata.file_type().is_symlink() + && fs_metadata.file_type().is_symlink() { // issue6456: Size returned may be longer due to encryption // on EXT-4 fscrypt. TODO maybe only do it on EXT4? @@ -270,7 +303,7 @@ { self.outcome.lock().unwrap().modified.push(full_path) } else { - let mtime = mtime_seconds(&fs_entry.metadata); + let mtime = mtime_seconds(fs_metadata); if truncate_i64(mtime) != entry.mtime || mtime == self.options.last_normal_time {