rust-status: collect traversed directories if required
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 12 May 2020 11:36:52 +0200
changeset 44838 c802ec4f7196
parent 44837 dc60ba32d43b
child 44839 01afda7e7d6c
rust-status: collect traversed directories if required Some commands (`hg purge` notably) register the `traversedir` callback on their matcher to run said callback every time a directory is traversed. This is the first of three patches, further broadening Rust support for status. Unfortunately, there is no way around collecting a full `Vec` (or any other owned datastructure, like a radix tree) and pushing it back up the Python layer since keeping the Python callback in a closure would mean giving up multithreading because of the GIL, which is obviously unacceptable. Performance is still a lot better than the Python+C path. Running `hg clean/purge` on Netbeans' repo (100k files): ``` | No-op | 30% unknown -------------------------- Rust | 1.0s | 1.67s C | 2.0s | 2.87s ``` Differential Revision: https://phab.mercurial-scm.org/D8518
rust/hg-core/src/dirstate/status.rs
--- a/rust/hg-core/src/dirstate/status.rs	Tue May 12 12:41:28 2020 +0200
+++ b/rust/hg-core/src/dirstate/status.rs	Tue May 12 11:36:52 2020 +0200
@@ -221,6 +221,7 @@
     dmap: &'a DirstateMap,
     root_dir: impl AsRef<Path> + Sync + Send + 'a,
     options: StatusOptions,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> {
     files
         .unwrap_or(&DEFAULT_WORK)
@@ -255,6 +256,13 @@
                         Some(Ok((normalized, Dispatch::Unknown)))
                     } else {
                         if file_type.is_dir() {
+                            if options.collect_traversed_dirs {
+                                // The receiver always outlives the sender,
+                                // so unwrap.
+                                traversed_sender
+                                    .send(normalized.to_owned())
+                                    .unwrap()
+                            }
                             Some(Ok((
                                 normalized,
                                 Dispatch::Directory {
@@ -302,6 +310,9 @@
     pub list_clean: bool,
     pub list_unknown: bool,
     pub list_ignored: bool,
+    /// Whether to collect traversed dirs for applying a callback later.
+    /// Used by `hg purge` for example.
+    pub collect_traversed_dirs: bool,
 }
 
 /// Dispatch a single entry (file, folder, symlink...) found during `traverse`.
@@ -319,6 +330,7 @@
     options: StatusOptions,
     filename: HgPathBuf,
     dir_entry: DirEntry,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> IoResult<()> {
     let file_type = dir_entry.file_type()?;
     let entry_option = dmap.get(&filename);
@@ -341,6 +353,7 @@
             options,
             entry_option,
             filename,
+            traversed_sender,
         );
     } else if file_type.is_file() || file_type.is_symlink() {
         if let Some(entry) = entry_option {
@@ -407,6 +420,7 @@
     options: StatusOptions,
     entry_option: Option<&'a DirstateEntry>,
     directory: HgPathBuf,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) {
     scope.spawn(move |_| {
         // Nested `if` until `rust-lang/rust#53668` is stable
@@ -433,6 +447,7 @@
                 ignore_fn,
                 dir_ignore_fn,
                 options,
+                traversed_sender,
             )
             .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap())
         }
@@ -451,9 +466,15 @@
     ignore_fn: &IgnoreFnType,
     dir_ignore_fn: &IgnoreFnType,
     options: StatusOptions,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> IoResult<()> {
     let directory = directory.as_ref();
 
+    if options.collect_traversed_dirs {
+        // The receiver always outlives the sender, so unwrap.
+        traversed_sender.send(directory.to_owned()).unwrap()
+    }
+
     let visit_entries = match matcher.visit_children_set(directory) {
         VisitChildrenSet::Empty => return Ok(()),
         VisitChildrenSet::This | VisitChildrenSet::Recursive => None,
@@ -510,6 +531,7 @@
                     options,
                     filename,
                     dir_entry,
+                    traversed_sender.clone(),
                 )?;
             }
         }
@@ -533,6 +555,7 @@
     dir_ignore_fn: &IgnoreFnType,
     options: StatusOptions,
     results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>,
+    traversed_sender: crossbeam::Sender<HgPathBuf>,
 ) -> IoResult<()> {
     let root_dir = root_dir.as_ref();
 
@@ -550,6 +573,7 @@
         &ignore_fn,
         &dir_ignore_fn,
         options,
+        traversed_sender,
     )?;
 
     // Disconnect the channel so the receiver stops waiting
@@ -640,11 +664,14 @@
     pub ignored: Vec<Cow<'a, HgPath>>,
     pub unknown: Vec<Cow<'a, HgPath>>,
     pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>,
+    /// Only filled if `collect_traversed_dirs` is `true`
+    pub traversed: Vec<HgPathBuf>,
 }
 
 #[timed]
 fn build_response<'a>(
     results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>,
+    traversed: Vec<HgPathBuf>,
 ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) {
     let mut lookup = vec![];
     let mut modified = vec![];
@@ -683,6 +710,7 @@
             ignored,
             unknown,
             bad,
+            traversed,
         },
     )
 }
@@ -849,8 +877,17 @@
 
     let files = matcher.file_set();
 
+    // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not.
+    let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded();
+
     // Step 1: check the files explicitly mentioned by the user
-    let explicit = walk_explicit(files, &dmap, root_dir, options);
+    let explicit = walk_explicit(
+        files,
+        &dmap,
+        root_dir,
+        options,
+        traversed_sender.clone(),
+    );
 
     // Collect results into a `Vec` because we do very few lookups in most
     // cases.
@@ -888,6 +925,7 @@
                             &dir_ignore_fn,
                             options,
                             &mut results,
+                            traversed_sender.clone(),
                         )?;
                     }
                 }
@@ -911,5 +949,9 @@
         }
     }
 
-    Ok((build_response(results), warnings))
+    // Close the channel
+    drop(traversed_sender);
+    let traversed_dirs = traversed_recv.into_iter().collect();
+
+    Ok((build_response(results, traversed_dirs), warnings))
 }