Mercurial > hg
changeset 43602:889ac87e8bfd
rust-status: improve status performance
This change does more things in the parallel loop, refactors the file-level
logic into two functions for added clarity.
This bit of Rust code takes 55ms to execute on a repo where the stat'ing part
of Valentin's fast path takes 40ms. While the code differs a bit and it's hard
to get an exact measurement of how much of a performance impact it has, I can
be fairly certain that this implementation is *at worse* twice as slow.
Differential Revision: https://phab.mercurial-scm.org/D7254
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 06 Nov 2019 13:43:18 +0100 |
parents | a80d5ddecc2d |
children | 75fe6e71ddb8 |
files | rust/hg-core/src/dirstate/status.rs |
diffstat | 1 files changed, 139 insertions(+), 102 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs Sat Nov 09 12:55:56 2019 +0900 +++ b/rust/hg-core/src/dirstate/status.rs Wed Nov 06 13:43:18 2019 +0100 @@ -10,22 +10,120 @@ //! and will only be triggered in narrow cases. use crate::utils::files::HgMetadata; -use crate::utils::hg_path::{hg_path_to_path_buf, HgPathBuf}; -use crate::{DirstateEntry, DirstateMap, EntryState}; +use crate::utils::hg_path::{hg_path_to_path_buf, HgPath}; +use crate::{CopyMap, DirstateEntry, DirstateMap, EntryState}; use rayon::prelude::*; use std::path::Path; -// Stat all entries in the `DirstateMap` and return their new metadata. -pub fn stat_dmap_entries( +/// Marker enum used to dispatch new status entries into the right collections. +/// Is similar to `crate::EntryState`, but represents the transient state of +/// entries during the lifetime of a command. +enum Dispatch { + Unsure, + Modified, + Added, + Removed, + Deleted, + Clean, + Unknown, +} + +/// The file corresponding to the dirstate entry was found on the filesystem. +fn dispatch_found( + filename: impl AsRef<HgPath>, + entry: DirstateEntry, + metadata: HgMetadata, + copy_map: &CopyMap, + check_exec: bool, + list_clean: bool, + last_normal_time: i64, +) -> Dispatch { + let DirstateEntry { + state, + mode, + mtime, + size, + } = entry; + + let HgMetadata { + st_mode, + st_size, + st_mtime, + .. + } = metadata; + + match state { + EntryState::Normal => { + // Dates and times that are outside the 31-bit signed + // range are compared modulo 2^31. This should prevent + // it from behaving badly with very large files or + // corrupt dates while still having a high probability + // of detecting changes. (issue2608) + let range_mask = 0x7fffffff; + + let size_changed = (size != st_size as i32) + && size != (st_size as i32 & range_mask); + let mode_changed = + (mode ^ st_mode as i32) & 0o100 != 0o000 && check_exec; + if size >= 0 + && (size_changed || mode_changed) + || size == -2 // other parent + || copy_map.contains_key(filename.as_ref()) + { + Dispatch::Modified + } else if mtime != st_mtime as i32 + && mtime != (st_mtime as i32 & range_mask) + { + Dispatch::Unsure + } else if st_mtime == last_normal_time { + // the file may have just been marked as normal and + // it may have changed in the same second without + // changing its size. This can happen if we quickly + // do multiple commits. Force lookup, so we don't + // miss such a racy file change. + Dispatch::Unsure + } else if list_clean { + Dispatch::Clean + } else { + Dispatch::Unknown + } + } + EntryState::Merged => Dispatch::Modified, + EntryState::Added => Dispatch::Added, + EntryState::Removed => Dispatch::Removed, + EntryState::Unknown => Dispatch::Unknown, + } +} + +/// The file corresponding to this Dirstate entry is missing. +fn dispatch_missing(state: EntryState) -> Dispatch { + match state { + // File was removed from the filesystem during commands + EntryState::Normal | EntryState::Merged | EntryState::Added => { + Dispatch::Deleted + } + // File was removed, everything is normal + EntryState::Removed => Dispatch::Removed, + // File is unknown to Mercurial, everything is normal + EntryState::Unknown => Dispatch::Unknown, + } +} + +/// Stat all entries in the `DirstateMap` and mark them for dispatch into +/// the relevant collections. +fn stat_dmap_entries( dmap: &DirstateMap, root_dir: impl AsRef<Path> + Sync, -) -> std::io::Result<Vec<(HgPathBuf, Option<HgMetadata>)>> { + check_exec: bool, + list_clean: bool, + last_normal_time: i64, +) -> std::io::Result<Vec<(&HgPath, Dispatch)>> { dmap.par_iter() .filter_map( // Getting file metadata is costly, so we don't do it if the // file is already present in the results, hence `filter_map` - |(filename, _)| -> Option< - std::io::Result<(HgPathBuf, Option<HgMetadata>)> + |(filename, entry)| -> Option< + std::io::Result<(&HgPath, Dispatch)> > { let meta = match hg_path_to_path_buf(filename) { Ok(p) => root_dir.as_ref().join(p).symlink_metadata(), @@ -37,12 +135,16 @@ if !(m.file_type().is_file() || m.file_type().is_symlink()) => { - Ok((filename.to_owned(), None)) + Ok((filename, dispatch_missing(entry.state))) } - Ok(m) => Ok(( - filename.to_owned(), - Some(HgMetadata::from_metadata(m)), - )), + Ok(m) => Ok((filename, dispatch_found( + filename, + *entry, + HgMetadata::from_metadata(m), + &dmap.copy_map, + check_exec, + list_clean, + last_normal_time))), Err(ref e) if e.kind() == std::io::ErrorKind::NotFound || e.raw_os_error() == Some(20) => @@ -51,7 +153,7 @@ // `NotADirectory` (errno 20) // It happens if the dirstate contains `foo/bar` and // foo is not a directory - Ok((filename.to_owned(), None)) + Ok((filename, dispatch_missing(entry.state))) } Err(e) => Err(e), }) @@ -60,23 +162,19 @@ .collect() } -pub struct StatusResult { - pub modified: Vec<HgPathBuf>, - pub added: Vec<HgPathBuf>, - pub removed: Vec<HgPathBuf>, - pub deleted: Vec<HgPathBuf>, - pub clean: Vec<HgPathBuf>, +pub struct StatusResult<'a> { + pub modified: Vec<&'a HgPath>, + pub added: Vec<&'a HgPath>, + pub removed: Vec<&'a HgPath>, + pub deleted: Vec<&'a HgPath>, + pub clean: Vec<&'a HgPath>, // TODO ignored // TODO unknown } fn build_response( - dmap: &DirstateMap, - list_clean: bool, - last_normal_time: i64, - check_exec: bool, - results: Vec<(HgPathBuf, Option<HgMetadata>)>, -) -> (Vec<HgPathBuf>, StatusResult) { + results: Vec<(&HgPath, Dispatch)>, +) -> (Vec<&HgPath>, StatusResult) { let mut lookup = vec![]; let mut modified = vec![]; let mut added = vec![]; @@ -84,76 +182,15 @@ let mut deleted = vec![]; let mut clean = vec![]; - for (filename, metadata_option) in results.into_iter() { - let DirstateEntry { - state, - mode, - mtime, - size, - } = match dmap.get(&filename) { - None => { - continue; - } - Some(e) => *e, - }; - - match metadata_option { - None => { - match state { - EntryState::Normal - | EntryState::Merged - | EntryState::Added => deleted.push(filename), - EntryState::Removed => removed.push(filename), - _ => {} - }; - } - Some(HgMetadata { - st_mode, - st_size, - st_mtime, - .. - }) => { - match state { - EntryState::Normal => { - // Dates and times that are outside the 31-bit signed - // range are compared modulo 2^31. This should prevent - // it from behaving badly with very large files or - // corrupt dates while still having a high probability - // of detecting changes. (issue2608) - let range_mask = 0x7fffffff; - - let size_changed = (size != st_size as i32) - && size != (st_size as i32 & range_mask); - let mode_changed = (mode ^ st_mode as i32) & 0o100 - != 0o000 - && check_exec; - if size >= 0 - && (size_changed || mode_changed) - || size == -2 // other parent - || dmap.copy_map.contains_key(&filename) - { - modified.push(filename); - } else if mtime != st_mtime as i32 - && mtime != (st_mtime as i32 & range_mask) - { - lookup.push(filename); - } else if st_mtime == last_normal_time { - // the file may have just been marked as normal and - // it may have changed in the same second without - // changing its size. This can happen if we quickly - // do multiple commits. Force lookup, so we don't - // miss such a racy file change. - lookup.push(filename); - } else if list_clean { - clean.push(filename); - } - } - EntryState::Merged => modified.push(filename), - EntryState::Added => added.push(filename), - EntryState::Removed => removed.push(filename), - EntryState::Unknown => {} - } - } + for (filename, dispatch) in results.into_iter() { + match dispatch { + Dispatch::Unknown => {} + Dispatch::Unsure => lookup.push(filename), + Dispatch::Modified => modified.push(filename), + Dispatch::Added => added.push(filename), + Dispatch::Removed => removed.push(filename), + Dispatch::Deleted => deleted.push(filename), + Dispatch::Clean => clean.push(filename), } } @@ -175,14 +212,14 @@ list_clean: bool, last_normal_time: i64, check_exec: bool, -) -> std::io::Result<(Vec<HgPathBuf>, StatusResult)> { - let results = stat_dmap_entries(&dmap, root_dir)?; - - Ok(build_response( +) -> std::io::Result<(Vec<&HgPath>, StatusResult)> { + let results = stat_dmap_entries( &dmap, + root_dir, + check_exec, list_clean, last_normal_time, - check_exec, - results, - )) + )?; + + Ok(build_response(results)) }