Mercurial > hg
changeset 43915:8c77826116f7
rust-dirstate-status: add `walk_explicit` implementation, use `Matcher` trait
This is the first time we actually use the `Matcher` trait, still for a small
subset of all matchers defined in Python.
While I haven't yet actually measured the performance of this, I have tried
to avoid any unnecessary allocations. This forces the use of heavy lifetimes
annotations which I am not sure we can simplify, although I would be happy
to be proven wrong.
Differential Revision: https://phab.mercurial-scm.org/D7529
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Fri, 29 Nov 2019 17:29:06 +0100 |
parents | 69c4f3cf2cdf |
children | 6a88ced33c40 |
files | rust/hg-core/src/dirstate/status.rs rust/hg-cpython/src/dirstate/status.rs |
diffstat | 2 files changed, 111 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs Fri Nov 29 18:54:06 2019 +0100 +++ b/rust/hg-core/src/dirstate/status.rs Fri Nov 29 17:29:06 2019 +0100 @@ -11,6 +11,7 @@ use crate::{ dirstate::SIZE_FROM_OTHER_PARENT, + matchers::Matcher, utils::{ files::HgMetadata, hg_path::{hg_path_to_path_buf, HgPath}, @@ -18,6 +19,7 @@ CopyMap, DirstateEntry, DirstateMap, EntryState, }; use rayon::prelude::*; +use std::collections::HashSet; use std::path::Path; /// Marker enum used to dispatch new status entries into the right collections. @@ -33,6 +35,8 @@ Unknown, } +type IoResult<T> = std::io::Result<T>; + /// Dates and times that are outside the 31-bit signed range are compared /// modulo 2^31. This should prevent hg from behaving badly with very large /// files or corrupt dates while still having a high probability of detecting @@ -116,6 +120,63 @@ } } +/// Get stat data about the files explicitly specified by match. +/// TODO subrepos +fn walk_explicit<'a>( + files: &'a HashSet<&HgPath>, + dmap: &'a DirstateMap, + root_dir: impl AsRef<Path> + Sync + Send, + check_exec: bool, + list_clean: bool, + last_normal_time: i64, +) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> { + files.par_iter().filter_map(move |filename| { + // TODO normalization + let normalized = filename.as_ref(); + + let buf = match hg_path_to_path_buf(normalized) { + Ok(x) => x, + Err(e) => return Some(Err(e.into())), + }; + let target = root_dir.as_ref().join(buf); + let st = target.symlink_metadata(); + match st { + Ok(meta) => { + let file_type = meta.file_type(); + if file_type.is_file() || file_type.is_symlink() { + if let Some(entry) = dmap.get(normalized) { + return Some(Ok(( + normalized, + dispatch_found( + &normalized, + *entry, + HgMetadata::from_metadata(meta), + &dmap.copy_map, + check_exec, + list_clean, + last_normal_time, + ), + ))); + } + } else { + if dmap.contains_key(normalized) { + return Some(Ok((normalized, Dispatch::Removed))); + } + } + } + Err(_) => { + if let Some(entry) = dmap.get(normalized) { + return Some(Ok(( + normalized, + dispatch_missing(entry.state), + ))); + } + } + }; + None + }) +} + /// Stat all entries in the `DirstateMap` and mark them for dispatch into /// the relevant collections. fn stat_dmap_entries( @@ -124,7 +185,7 @@ check_exec: bool, list_clean: bool, last_normal_time: i64, -) -> impl ParallelIterator<Item = std::io::Result<(&HgPath, Dispatch)>> { +) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> { dmap.par_iter().map(move |(filename, entry)| { let filename: &HgPath = filename; let filename_as_path = hg_path_to_path_buf(filename)?; @@ -174,9 +235,9 @@ * TODO unknown */ } -fn build_response( - results: Vec<(&HgPath, Dispatch)>, -) -> (Vec<&HgPath>, StatusResult) { +fn build_response<'a>( + results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>, +) -> IoResult<(Vec<&'a HgPath>, StatusResult<'a>)> { let mut lookup = vec![]; let mut modified = vec![]; let mut added = vec![]; @@ -184,7 +245,8 @@ let mut deleted = vec![]; let mut clean = vec![]; - for (filename, dispatch) in results.into_iter() { + for res in results.into_iter() { + let (filename, dispatch) = res?; match dispatch { Dispatch::Unknown => {} Dispatch::Unsure => lookup.push(filename), @@ -196,7 +258,7 @@ } } - ( + Ok(( lookup, StatusResult { modified, @@ -205,24 +267,40 @@ deleted, clean, }, - ) + )) } -pub fn status( - dmap: &DirstateMap, +pub fn status<'a: 'c, 'b: 'c, 'c>( + dmap: &'a DirstateMap, + matcher: &'b (impl Matcher), root_dir: impl AsRef<Path> + Sync + Send + Copy, list_clean: bool, last_normal_time: i64, check_exec: bool, -) -> std::io::Result<(Vec<&HgPath>, StatusResult)> { - let results: std::io::Result<_> = stat_dmap_entries( - &dmap, - root_dir, - check_exec, - list_clean, - last_normal_time, - ) - .collect(); +) -> IoResult<(Vec<&'c HgPath>, StatusResult<'c>)> { + let files = matcher.file_set(); + let mut results = vec![]; + if let Some(files) = files { + results.par_extend(walk_explicit( + &files, + &dmap, + root_dir, + check_exec, + list_clean, + last_normal_time, + )); + } - Ok(build_response(results?)) + if !matcher.is_exact() { + let stat_results = stat_dmap_entries( + &dmap, + root_dir, + check_exec, + list_clean, + last_normal_time, + ); + results.par_extend(stat_results); + } + + build_response(results) }
--- a/rust/hg-cpython/src/dirstate/status.rs Fri Nov 29 18:54:06 2019 +0100 +++ b/rust/hg-cpython/src/dirstate/status.rs Fri Nov 29 17:29:06 2019 +0100 @@ -17,6 +17,7 @@ }; use hg::utils::files::get_path_from_bytes; +use hg::matchers::AlwaysMatcher; use hg::status; use hg::utils::hg_path::HgPath; @@ -53,9 +54,19 @@ let dmap: DirstateMap = dmap.to_py_object(py); let dmap = dmap.get_inner(py); - let (lookup, status_res) = - status(&dmap, &root_dir, list_clean, last_normal_time, check_exec) - .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; + // TODO removed in the next patch to get the code to compile. This patch + // is part of a series and does not make real sense on its own. + let matcher = AlwaysMatcher; + + let (lookup, status_res) = status( + &dmap, + &matcher, + &root_dir, + list_clean, + last_normal_time, + check_exec, + ) + .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; let modified = collect_pybytes_list(py, status_res.modified.as_ref()); let added = collect_pybytes_list(py, status_res.added.as_ref());