changeset 43602:889ac87e8bfd

rust-status: improve status performance This change does more things in the parallel loop, refactors the file-level logic into two functions for added clarity. This bit of Rust code takes 55ms to execute on a repo where the stat'ing part of Valentin's fast path takes 40ms. While the code differs a bit and it's hard to get an exact measurement of how much of a performance impact it has, I can be fairly certain that this implementation is *at worse* twice as slow. Differential Revision: https://phab.mercurial-scm.org/D7254
author Raphaël Gomès <rgomes@octobus.net>
date Wed, 06 Nov 2019 13:43:18 +0100
parents a80d5ddecc2d
children 75fe6e71ddb8
files rust/hg-core/src/dirstate/status.rs
diffstat 1 files changed, 139 insertions(+), 102 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs	Sat Nov 09 12:55:56 2019 +0900
+++ b/rust/hg-core/src/dirstate/status.rs	Wed Nov 06 13:43:18 2019 +0100
@@ -10,22 +10,120 @@
 //! and will only be triggered in narrow cases.
 
 use crate::utils::files::HgMetadata;
-use crate::utils::hg_path::{hg_path_to_path_buf, HgPathBuf};
-use crate::{DirstateEntry, DirstateMap, EntryState};
+use crate::utils::hg_path::{hg_path_to_path_buf, HgPath};
+use crate::{CopyMap, DirstateEntry, DirstateMap, EntryState};
 use rayon::prelude::*;
 use std::path::Path;
 
-// Stat all entries in the `DirstateMap` and return their new metadata.
-pub fn stat_dmap_entries(
+/// Marker enum used to dispatch new status entries into the right collections.
+/// Is similar to `crate::EntryState`, but represents the transient state of
+/// entries during the lifetime of a command.
+enum Dispatch {
+    Unsure,
+    Modified,
+    Added,
+    Removed,
+    Deleted,
+    Clean,
+    Unknown,
+}
+
+/// The file corresponding to the dirstate entry was found on the filesystem.
+fn dispatch_found(
+    filename: impl AsRef<HgPath>,
+    entry: DirstateEntry,
+    metadata: HgMetadata,
+    copy_map: &CopyMap,
+    check_exec: bool,
+    list_clean: bool,
+    last_normal_time: i64,
+) -> Dispatch {
+    let DirstateEntry {
+        state,
+        mode,
+        mtime,
+        size,
+    } = entry;
+
+    let HgMetadata {
+        st_mode,
+        st_size,
+        st_mtime,
+        ..
+    } = metadata;
+
+    match state {
+        EntryState::Normal => {
+            // Dates and times that are outside the 31-bit signed
+            // range are compared modulo 2^31. This should prevent
+            // it from behaving badly with very large files or
+            // corrupt dates while still having a high probability
+            // of detecting changes. (issue2608)
+            let range_mask = 0x7fffffff;
+
+            let size_changed = (size != st_size as i32)
+                && size != (st_size as i32 & range_mask);
+            let mode_changed =
+                (mode ^ st_mode as i32) & 0o100 != 0o000 && check_exec;
+            if size >= 0
+                            && (size_changed || mode_changed)
+                            || size == -2  // other parent
+                            || copy_map.contains_key(filename.as_ref())
+            {
+                Dispatch::Modified
+            } else if mtime != st_mtime as i32
+                && mtime != (st_mtime as i32 & range_mask)
+            {
+                Dispatch::Unsure
+            } else if st_mtime == last_normal_time {
+                // the file may have just been marked as normal and
+                // it may have changed in the same second without
+                // changing its size. This can happen if we quickly
+                // do multiple commits. Force lookup, so we don't
+                // miss such a racy file change.
+                Dispatch::Unsure
+            } else if list_clean {
+                Dispatch::Clean
+            } else {
+                Dispatch::Unknown
+            }
+        }
+        EntryState::Merged => Dispatch::Modified,
+        EntryState::Added => Dispatch::Added,
+        EntryState::Removed => Dispatch::Removed,
+        EntryState::Unknown => Dispatch::Unknown,
+    }
+}
+
+/// The file corresponding to this Dirstate entry is missing.
+fn dispatch_missing(state: EntryState) -> Dispatch {
+    match state {
+        // File was removed from the filesystem during commands
+        EntryState::Normal | EntryState::Merged | EntryState::Added => {
+            Dispatch::Deleted
+        }
+        // File was removed, everything is normal
+        EntryState::Removed => Dispatch::Removed,
+        // File is unknown to Mercurial, everything is normal
+        EntryState::Unknown => Dispatch::Unknown,
+    }
+}
+
+/// Stat all entries in the `DirstateMap` and mark them for dispatch into
+/// the relevant collections.
+fn stat_dmap_entries(
     dmap: &DirstateMap,
     root_dir: impl AsRef<Path> + Sync,
-) -> std::io::Result<Vec<(HgPathBuf, Option<HgMetadata>)>> {
+    check_exec: bool,
+    list_clean: bool,
+    last_normal_time: i64,
+) -> std::io::Result<Vec<(&HgPath, Dispatch)>> {
     dmap.par_iter()
         .filter_map(
             // Getting file metadata is costly, so we don't do it if the
             // file is already present in the results, hence `filter_map`
-            |(filename, _)| -> Option<
-                std::io::Result<(HgPathBuf, Option<HgMetadata>)>
+            |(filename, entry)| -> Option<
+                std::io::Result<(&HgPath, Dispatch)>
             > {
                 let meta = match hg_path_to_path_buf(filename) {
                     Ok(p) => root_dir.as_ref().join(p).symlink_metadata(),
@@ -37,12 +135,16 @@
                         if !(m.file_type().is_file()
                             || m.file_type().is_symlink()) =>
                     {
-                        Ok((filename.to_owned(), None))
+                        Ok((filename, dispatch_missing(entry.state)))
                     }
-                    Ok(m) => Ok((
-                        filename.to_owned(),
-                        Some(HgMetadata::from_metadata(m)),
-                    )),
+                    Ok(m) => Ok((filename, dispatch_found(
+                            filename,
+                            *entry,
+                            HgMetadata::from_metadata(m),
+                            &dmap.copy_map,
+                            check_exec,
+                            list_clean,
+                            last_normal_time))),
                     Err(ref e)
                         if e.kind() == std::io::ErrorKind::NotFound
                             || e.raw_os_error() == Some(20) =>
@@ -51,7 +153,7 @@
                         // `NotADirectory` (errno 20)
                         // It happens if the dirstate contains `foo/bar` and
                         // foo is not a directory
-                        Ok((filename.to_owned(), None))
+                        Ok((filename, dispatch_missing(entry.state)))
                     }
                     Err(e) => Err(e),
                 })
@@ -60,23 +162,19 @@
         .collect()
 }
 
-pub struct StatusResult {
-    pub modified: Vec<HgPathBuf>,
-    pub added: Vec<HgPathBuf>,
-    pub removed: Vec<HgPathBuf>,
-    pub deleted: Vec<HgPathBuf>,
-    pub clean: Vec<HgPathBuf>,
+pub struct StatusResult<'a> {
+    pub modified: Vec<&'a HgPath>,
+    pub added: Vec<&'a HgPath>,
+    pub removed: Vec<&'a HgPath>,
+    pub deleted: Vec<&'a HgPath>,
+    pub clean: Vec<&'a HgPath>,
     // TODO ignored
     // TODO unknown
 }
 
 fn build_response(
-    dmap: &DirstateMap,
-    list_clean: bool,
-    last_normal_time: i64,
-    check_exec: bool,
-    results: Vec<(HgPathBuf, Option<HgMetadata>)>,
-) -> (Vec<HgPathBuf>, StatusResult) {
+    results: Vec<(&HgPath, Dispatch)>,
+) -> (Vec<&HgPath>, StatusResult) {
     let mut lookup = vec![];
     let mut modified = vec![];
     let mut added = vec![];
@@ -84,76 +182,15 @@
     let mut deleted = vec![];
     let mut clean = vec![];
 
-    for (filename, metadata_option) in results.into_iter() {
-        let DirstateEntry {
-            state,
-            mode,
-            mtime,
-            size,
-        } = match dmap.get(&filename) {
-            None => {
-                continue;
-            }
-            Some(e) => *e,
-        };
-
-        match metadata_option {
-            None => {
-                match state {
-                    EntryState::Normal
-                    | EntryState::Merged
-                    | EntryState::Added => deleted.push(filename),
-                    EntryState::Removed => removed.push(filename),
-                    _ => {}
-                };
-            }
-            Some(HgMetadata {
-                st_mode,
-                st_size,
-                st_mtime,
-                ..
-            }) => {
-                match state {
-                    EntryState::Normal => {
-                        // Dates and times that are outside the 31-bit signed
-                        // range are compared modulo 2^31. This should prevent
-                        // it from behaving badly with very large files or
-                        // corrupt dates while still having a high probability
-                        // of detecting changes. (issue2608)
-                        let range_mask = 0x7fffffff;
-
-                        let size_changed = (size != st_size as i32)
-                            && size != (st_size as i32 & range_mask);
-                        let mode_changed = (mode ^ st_mode as i32) & 0o100
-                            != 0o000
-                            && check_exec;
-                        if size >= 0
-                            && (size_changed || mode_changed)
-                            || size == -2  // other parent
-                            || dmap.copy_map.contains_key(&filename)
-                        {
-                            modified.push(filename);
-                        } else if mtime != st_mtime as i32
-                            && mtime != (st_mtime as i32 & range_mask)
-                        {
-                            lookup.push(filename);
-                        } else if st_mtime == last_normal_time {
-                            // the file may have just been marked as normal and
-                            // it may have changed in the same second without
-                            // changing its size. This can happen if we quickly
-                            // do multiple commits. Force lookup, so we don't
-                            // miss such a racy file change.
-                            lookup.push(filename);
-                        } else if list_clean {
-                            clean.push(filename);
-                        }
-                    }
-                    EntryState::Merged => modified.push(filename),
-                    EntryState::Added => added.push(filename),
-                    EntryState::Removed => removed.push(filename),
-                    EntryState::Unknown => {}
-                }
-            }
+    for (filename, dispatch) in results.into_iter() {
+        match dispatch {
+            Dispatch::Unknown => {}
+            Dispatch::Unsure => lookup.push(filename),
+            Dispatch::Modified => modified.push(filename),
+            Dispatch::Added => added.push(filename),
+            Dispatch::Removed => removed.push(filename),
+            Dispatch::Deleted => deleted.push(filename),
+            Dispatch::Clean => clean.push(filename),
         }
     }
 
@@ -175,14 +212,14 @@
     list_clean: bool,
     last_normal_time: i64,
     check_exec: bool,
-) -> std::io::Result<(Vec<HgPathBuf>, StatusResult)> {
-    let results = stat_dmap_entries(&dmap, root_dir)?;
-
-    Ok(build_response(
+) -> std::io::Result<(Vec<&HgPath>, StatusResult)> {
+    let results = stat_dmap_entries(
         &dmap,
+        root_dir,
+        check_exec,
         list_clean,
         last_normal_time,
-        check_exec,
-        results,
-    ))
+    )?;
+
+    Ok(build_response(results))
 }