Mercurial > hg
changeset 44838:c802ec4f7196
rust-status: collect traversed directories if required
Some commands (`hg purge` notably) register the `traversedir` callback on their
matcher to run said callback every time a directory is traversed.
This is the first of three patches, further broadening Rust support for status.
Unfortunately, there is no way around collecting a full `Vec` (or any other
owned datastructure, like a radix tree) and pushing it back up the Python layer
since keeping the Python callback in a closure would mean giving up
multithreading because of the GIL, which is obviously unacceptable.
Performance is still a lot better than the Python+C path.
Running `hg clean/purge` on Netbeans' repo (100k files):
```
| No-op | 30% unknown
--------------------------
Rust | 1.0s | 1.67s
C | 2.0s | 2.87s
```
Differential Revision: https://phab.mercurial-scm.org/D8518
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Tue, 12 May 2020 11:36:52 +0200 |
parents | dc60ba32d43b |
children | 01afda7e7d6c |
files | rust/hg-core/src/dirstate/status.rs |
diffstat | 1 files changed, 44 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs Tue May 12 12:41:28 2020 +0200 +++ b/rust/hg-core/src/dirstate/status.rs Tue May 12 11:36:52 2020 +0200 @@ -221,6 +221,7 @@ dmap: &'a DirstateMap, root_dir: impl AsRef<Path> + Sync + Send + 'a, options: StatusOptions, + traversed_sender: crossbeam::Sender<HgPathBuf>, ) -> impl ParallelIterator<Item = IoResult<(&'a HgPath, Dispatch)>> { files .unwrap_or(&DEFAULT_WORK) @@ -255,6 +256,13 @@ Some(Ok((normalized, Dispatch::Unknown))) } else { if file_type.is_dir() { + if options.collect_traversed_dirs { + // The receiver always outlives the sender, + // so unwrap. + traversed_sender + .send(normalized.to_owned()) + .unwrap() + } Some(Ok(( normalized, Dispatch::Directory { @@ -302,6 +310,9 @@ pub list_clean: bool, pub list_unknown: bool, pub list_ignored: bool, + /// Whether to collect traversed dirs for applying a callback later. + /// Used by `hg purge` for example. + pub collect_traversed_dirs: bool, } /// Dispatch a single entry (file, folder, symlink...) found during `traverse`. @@ -319,6 +330,7 @@ options: StatusOptions, filename: HgPathBuf, dir_entry: DirEntry, + traversed_sender: crossbeam::Sender<HgPathBuf>, ) -> IoResult<()> { let file_type = dir_entry.file_type()?; let entry_option = dmap.get(&filename); @@ -341,6 +353,7 @@ options, entry_option, filename, + traversed_sender, ); } else if file_type.is_file() || file_type.is_symlink() { if let Some(entry) = entry_option { @@ -407,6 +420,7 @@ options: StatusOptions, entry_option: Option<&'a DirstateEntry>, directory: HgPathBuf, + traversed_sender: crossbeam::Sender<HgPathBuf>, ) { scope.spawn(move |_| { // Nested `if` until `rust-lang/rust#53668` is stable @@ -433,6 +447,7 @@ ignore_fn, dir_ignore_fn, options, + traversed_sender, ) .unwrap_or_else(|e| files_sender.send(Err(e)).unwrap()) } @@ -451,9 +466,15 @@ ignore_fn: &IgnoreFnType, dir_ignore_fn: &IgnoreFnType, options: StatusOptions, + traversed_sender: crossbeam::Sender<HgPathBuf>, ) -> IoResult<()> { let directory = directory.as_ref(); + if options.collect_traversed_dirs { + // The receiver always outlives the sender, so unwrap. + traversed_sender.send(directory.to_owned()).unwrap() + } + let visit_entries = match matcher.visit_children_set(directory) { VisitChildrenSet::Empty => return Ok(()), VisitChildrenSet::This | VisitChildrenSet::Recursive => None, @@ -510,6 +531,7 @@ options, filename, dir_entry, + traversed_sender.clone(), )?; } } @@ -533,6 +555,7 @@ dir_ignore_fn: &IgnoreFnType, options: StatusOptions, results: &mut Vec<(Cow<'a, HgPath>, Dispatch)>, + traversed_sender: crossbeam::Sender<HgPathBuf>, ) -> IoResult<()> { let root_dir = root_dir.as_ref(); @@ -550,6 +573,7 @@ &ignore_fn, &dir_ignore_fn, options, + traversed_sender, )?; // Disconnect the channel so the receiver stops waiting @@ -640,11 +664,14 @@ pub ignored: Vec<Cow<'a, HgPath>>, pub unknown: Vec<Cow<'a, HgPath>>, pub bad: Vec<(Cow<'a, HgPath>, BadMatch)>, + /// Only filled if `collect_traversed_dirs` is `true` + pub traversed: Vec<HgPathBuf>, } #[timed] fn build_response<'a>( results: impl IntoIterator<Item = (Cow<'a, HgPath>, Dispatch)>, + traversed: Vec<HgPathBuf>, ) -> (Vec<Cow<'a, HgPath>>, DirstateStatus<'a>) { let mut lookup = vec![]; let mut modified = vec![]; @@ -683,6 +710,7 @@ ignored, unknown, bad, + traversed, }, ) } @@ -849,8 +877,17 @@ let files = matcher.file_set(); + // `crossbeam::Sender` is `Send`, while `mpsc::Sender` is not. + let (traversed_sender, traversed_recv) = crossbeam::channel::unbounded(); + // Step 1: check the files explicitly mentioned by the user - let explicit = walk_explicit(files, &dmap, root_dir, options); + let explicit = walk_explicit( + files, + &dmap, + root_dir, + options, + traversed_sender.clone(), + ); // Collect results into a `Vec` because we do very few lookups in most // cases. @@ -888,6 +925,7 @@ &dir_ignore_fn, options, &mut results, + traversed_sender.clone(), )?; } } @@ -911,5 +949,9 @@ } } - Ok((build_response(results), warnings)) + // Close the channel + drop(traversed_sender); + let traversed_dirs = traversed_recv.into_iter().collect(); + + Ok((build_response(results, traversed_dirs), warnings)) }