rust-dirstate-status: add `walk_explicit` implementation, use `Matcher` trait
authorRaphaël Gomès <rgomes@octobus.net>
Fri, 29 Nov 2019 17:29:06 +0100
changeset 43915 8c77826116f7
parent 43914 69c4f3cf2cdf
child 43916 6a88ced33c40
rust-dirstate-status: add `walk_explicit` implementation, use `Matcher` trait This is the first time we actually use the `Matcher` trait, still for a small subset of all matchers defined in Python. While I haven't yet actually measured the performance of this, I have tried to avoid any unnecessary allocations. This forces the use of heavy lifetimes annotations which I am not sure we can simplify, although I would be happy to be proven wrong. Differential Revision: https://phab.mercurial-scm.org/D7529
rust/hg-core/src/dirstate/status.rs
rust/hg-cpython/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs	Fri Nov 29 18:54:06 2019 +0100
+++ b/rust/hg-core/src/dirstate/status.rs	Fri Nov 29 17:29:06 2019 +0100
@@ -11,6 +11,7 @@
 
 use crate::{
     dirstate::SIZE_FROM_OTHER_PARENT,
+    matchers::Matcher,
     utils::{
         files::HgMetadata,
         hg_path::{hg_path_to_path_buf, HgPath},
@@ -18,6 +19,7 @@
     CopyMap, DirstateEntry, DirstateMap, EntryState,
 };
 use rayon::prelude::*;
+use std::collections::HashSet;
 use std::path::Path;
 
 /// Marker enum used to dispatch new status entries into the right collections.
@@ -33,6 +35,8 @@
     Unknown,
 }
 
+type IoResult<T> = std::io::Result<T>;
+
 /// Dates and times that are outside the 31-bit signed range are compared
 /// modulo 2^31. This should prevent hg from behaving badly with very large
 /// files or corrupt dates while still having a high probability of detecting
@@ -116,6 +120,63 @@
     }
 }
 
+/// Get stat data about the files explicitly specified by match.
+/// TODO subrepos
+fn walk_explicit<'a>(
+    files: &'a HashSet<&HgPath>,
+    dmap: &'a DirstateMap,
+    root_dir: impl AsRef<Path> + Sync + Send,
+    check_exec: bool,
+    list_clean: bool,
+    last_normal_time: i64,
+) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
+    files.par_iter().filter_map(move |filename| {
+        // TODO normalization
+        let normalized = filename.as_ref();
+
+        let buf = match hg_path_to_path_buf(normalized) {
+            Ok(x) => x,
+            Err(e) => return Some(Err(e.into())),
+        };
+        let target = root_dir.as_ref().join(buf);
+        let st = target.symlink_metadata();
+        match st {
+            Ok(meta) => {
+                let file_type = meta.file_type();
+                if file_type.is_file() || file_type.is_symlink() {
+                    if let Some(entry) = dmap.get(normalized) {
+                        return Some(Ok((
+                            normalized,
+                            dispatch_found(
+                                &normalized,
+                                *entry,
+                                HgMetadata::from_metadata(meta),
+                                &dmap.copy_map,
+                                check_exec,
+                                list_clean,
+                                last_normal_time,
+                            ),
+                        )));
+                    }
+                } else {
+                    if dmap.contains_key(normalized) {
+                        return Some(Ok((normalized, Dispatch::Removed)));
+                    }
+                }
+            }
+            Err(_) => {
+                if let Some(entry) = dmap.get(normalized) {
+                    return Some(Ok((
+                        normalized,
+                        dispatch_missing(entry.state),
+                    )));
+                }
+            }
+        };
+        None
+    })
+}
+
 /// Stat all entries in the `DirstateMap` and mark them for dispatch into
 /// the relevant collections.
 fn stat_dmap_entries(
@@ -124,7 +185,7 @@
     check_exec: bool,
     list_clean: bool,
     last_normal_time: i64,
-) -> impl ParallelIterator<Item = std::io::Result<(&HgPath, Dispatch)>> {
+) -> impl ParallelIterator<Item = IoResult<(&HgPath, Dispatch)>> {
     dmap.par_iter().map(move |(filename, entry)| {
         let filename: &HgPath = filename;
         let filename_as_path = hg_path_to_path_buf(filename)?;
@@ -174,9 +235,9 @@
      * TODO unknown */
 }
 
-fn build_response(
-    results: Vec<(&HgPath, Dispatch)>,
-) -> (Vec<&HgPath>, StatusResult) {
+fn build_response<'a>(
+    results: impl IntoIterator<Item = IoResult<(&'a HgPath, Dispatch)>>,
+) -> IoResult<(Vec<&'a HgPath>, StatusResult<'a>)> {
     let mut lookup = vec![];
     let mut modified = vec![];
     let mut added = vec![];
@@ -184,7 +245,8 @@
     let mut deleted = vec![];
     let mut clean = vec![];
 
-    for (filename, dispatch) in results.into_iter() {
+    for res in results.into_iter() {
+        let (filename, dispatch) = res?;
         match dispatch {
             Dispatch::Unknown => {}
             Dispatch::Unsure => lookup.push(filename),
@@ -196,7 +258,7 @@
         }
     }
 
-    (
+    Ok((
         lookup,
         StatusResult {
             modified,
@@ -205,24 +267,40 @@
             deleted,
             clean,
         },
-    )
+    ))
 }
 
-pub fn status(
-    dmap: &DirstateMap,
+pub fn status<'a: 'c, 'b: 'c, 'c>(
+    dmap: &'a DirstateMap,
+    matcher: &'b (impl Matcher),
     root_dir: impl AsRef<Path> + Sync + Send + Copy,
     list_clean: bool,
     last_normal_time: i64,
     check_exec: bool,
-) -> std::io::Result<(Vec<&HgPath>, StatusResult)> {
-    let results: std::io::Result<_> = stat_dmap_entries(
-        &dmap,
-        root_dir,
-        check_exec,
-        list_clean,
-        last_normal_time,
-    )
-    .collect();
+) -> IoResult<(Vec<&'c HgPath>, StatusResult<'c>)> {
+    let files = matcher.file_set();
+    let mut results = vec![];
+    if let Some(files) = files {
+        results.par_extend(walk_explicit(
+            &files,
+            &dmap,
+            root_dir,
+            check_exec,
+            list_clean,
+            last_normal_time,
+        ));
+    }
 
-    Ok(build_response(results?))
+    if !matcher.is_exact() {
+        let stat_results = stat_dmap_entries(
+            &dmap,
+            root_dir,
+            check_exec,
+            list_clean,
+            last_normal_time,
+        );
+        results.par_extend(stat_results);
+    }
+
+    build_response(results)
 }
--- a/rust/hg-cpython/src/dirstate/status.rs	Fri Nov 29 18:54:06 2019 +0100
+++ b/rust/hg-cpython/src/dirstate/status.rs	Fri Nov 29 17:29:06 2019 +0100
@@ -17,6 +17,7 @@
 };
 use hg::utils::files::get_path_from_bytes;
 
+use hg::matchers::AlwaysMatcher;
 use hg::status;
 use hg::utils::hg_path::HgPath;
 
@@ -53,9 +54,19 @@
     let dmap: DirstateMap = dmap.to_py_object(py);
     let dmap = dmap.get_inner(py);
 
-    let (lookup, status_res) =
-        status(&dmap, &root_dir, list_clean, last_normal_time, check_exec)
-            .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
+    // TODO removed in the next patch to get the code to compile. This patch
+    // is part of a series and does not make real sense on its own.
+    let matcher = AlwaysMatcher;
+
+    let (lookup, status_res) = status(
+        &dmap,
+        &matcher,
+        &root_dir,
+        list_clean,
+        last_normal_time,
+        check_exec,
+    )
+    .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?;
 
     let modified = collect_pybytes_list(py, status_res.modified.as_ref());
     let added = collect_pybytes_list(py, status_res.added.as_ref());