Mercurial > hg
changeset 47117:60d852ae7e7b
dirstate-tree: Paralellize the status algorithm with Rayon
The `rayon` crate exposes "parallel iterators" that work like normal iterators
but dispatch work on different items to an implicit global thread pool.
Differential Revision: https://phab.mercurial-scm.org/D10551
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Tue, 27 Apr 2021 14:20:48 +0200 |
parents | 04bcba539c96 |
children | c92e63762573 |
files | rust/hg-core/src/dirstate_tree/status.rs |
diffstat | 1 files changed, 56 insertions(+), 38 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/status.rs Tue Apr 27 12:42:21 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/status.rs Tue Apr 27 14:20:48 2021 +0200 @@ -13,10 +13,12 @@ use crate::PatternFileWarning; use crate::StatusError; use crate::StatusOptions; +use rayon::prelude::*; use std::borrow::Cow; use std::io; use std::path::Path; use std::path::PathBuf; +use std::sync::Mutex; /// Returns the status of the working directory compared to its parent /// changeset. @@ -41,11 +43,11 @@ (Box::new(|&_| true), vec![]) }; - let mut common = StatusCommon { + let common = StatusCommon { options, matcher, ignore_fn, - outcome: DirstateStatus::default(), + outcome: Mutex::new(DirstateStatus::default()), }; let is_at_repo_root = true; let hg_path = HgPath::new(""); @@ -57,7 +59,7 @@ &root_dir, is_at_repo_root, ); - Ok((common.outcome, warnings)) + Ok((common.outcome.into_inner().unwrap(), warnings)) } /// Bag of random things needed by various parts of the algorithm. Reduces the @@ -66,12 +68,12 @@ options: StatusOptions, matcher: &'a (dyn Matcher + Sync), ignore_fn: IgnoreFnType<'a>, - outcome: DirstateStatus<'tree>, + outcome: Mutex<DirstateStatus<'tree>>, } impl<'tree, 'a> StatusCommon<'tree, 'a> { fn read_dir( - &mut self, + &self, hg_path: &HgPath, fs_path: &Path, is_at_repo_root: bool, @@ -79,13 +81,15 @@ DirEntry::read_dir(fs_path, is_at_repo_root).map_err(|error| { let errno = error.raw_os_error().expect("expected real OS error"); self.outcome + .lock() + .unwrap() .bad .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) }) } fn traverse_fs_directory_and_dirstate( - &mut self, + &self, has_ignored_ancestor: bool, dirstate_nodes: &'tree mut ChildNodes, directory_hg_path: &'tree HgPath, @@ -104,19 +108,22 @@ // `merge_join_by` requires both its input iterators to be sorted: + // // * `BTreeMap` iterates according to keys’ ordering by definition // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: // https://github.com/rust-lang/rust/issues/34162 fs_entries.sort_unstable_by(|e1, e2| e1.base_name.cmp(&e2.base_name)); - for pair in itertools::merge_join_by( + itertools::merge_join_by( dirstate_nodes, &fs_entries, |(full_path, _node), fs_entry| { full_path.base_name().cmp(&fs_entry.base_name) }, - ) { + ) + .par_bridge() + .for_each(|pair| { use itertools::EitherOrBoth::*; match pair { Both((hg_path, dirstate_node), fs_entry) => { @@ -137,11 +144,11 @@ fs_entry, ), } - } + }) } fn traverse_fs_and_dirstate( - &mut self, + &self, fs_entry: &DirEntry, hg_path: &'tree HgPath, dirstate_node: &'tree mut Node, @@ -160,7 +167,7 @@ } if file_type.is_dir() { if self.options.collect_traversed_dirs { - self.outcome.traversed.push(hg_path.into()) + self.outcome.lock().unwrap().traversed.push(hg_path.into()) } let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); let is_at_repo_root = false; @@ -177,14 +184,20 @@ if let Some(entry) = &dirstate_node.entry { match entry.state { EntryState::Added => { - self.outcome.added.push(full_path) + self.outcome.lock().unwrap().added.push(full_path) } - EntryState::Removed => { - self.outcome.removed.push(full_path) - } - EntryState::Merged => { - self.outcome.modified.push(full_path) - } + EntryState::Removed => self + .outcome + .lock() + .unwrap() + .removed + .push(full_path), + EntryState::Merged => self + .outcome + .lock() + .unwrap() + .modified + .push(full_path), EntryState::Normal => { self.handle_normal_file( full_path, @@ -219,7 +232,7 @@ /// A file with `EntryState::Normal` in the dirstate was found in the /// filesystem fn handle_normal_file( - &mut self, + &self, full_path: Cow<'tree, HgPath>, dirstate_node: &Node, entry: &crate::DirstateEntry, @@ -243,34 +256,39 @@ { // issue6456: Size returned may be longer due to encryption // on EXT-4 fscrypt. TODO maybe only do it on EXT4? - self.outcome.unsure.push(full_path) + self.outcome.lock().unwrap().unsure.push(full_path) } else if dirstate_node.copy_source.is_some() || entry.is_from_other_parent() || (entry.size >= 0 && (size_changed || mode_changed())) { - self.outcome.modified.push(full_path) + self.outcome.lock().unwrap().modified.push(full_path) } else { let mtime = mtime_seconds(&fs_entry.metadata); if truncate_i64(mtime) != entry.mtime || mtime == self.options.last_normal_time { - self.outcome.unsure.push(full_path) + self.outcome.lock().unwrap().unsure.push(full_path) } else if self.options.list_clean { - self.outcome.clean.push(full_path) + self.outcome.lock().unwrap().clean.push(full_path) } } } /// A node in the dirstate tree has no corresponding filesystem entry fn traverse_dirstate_only( - &mut self, + &self, hg_path: &'tree HgPath, dirstate_node: &'tree mut Node, ) { self.mark_removed_or_deleted_if_file(hg_path, dirstate_node.state()); - for (child_hg_path, child_node) in &mut dirstate_node.children { - self.traverse_dirstate_only(child_hg_path.full_path(), child_node) - } + dirstate_node.children.par_iter_mut().for_each( + |(child_hg_path, child_node)| { + self.traverse_dirstate_only( + child_hg_path.full_path(), + child_node, + ) + }, + ) } /// A node in the dirstate tree has no corresponding *file* on the @@ -278,16 +296,16 @@ /// /// Does nothing on a "directory" node fn mark_removed_or_deleted_if_file( - &mut self, + &self, hg_path: &'tree HgPath, dirstate_node_state: Option<EntryState>, ) { if let Some(state) = dirstate_node_state { if self.matcher.matches(hg_path) { if let EntryState::Removed = state { - self.outcome.removed.push(hg_path.into()) + self.outcome.lock().unwrap().removed.push(hg_path.into()) } else { - self.outcome.deleted.push(hg_path.into()) + self.outcome.lock().unwrap().deleted.push(hg_path.into()) } } } @@ -295,7 +313,7 @@ /// Something in the filesystem has no corresponding dirstate node fn traverse_fs_only( - &mut self, + &self, has_ignored_ancestor: bool, directory_hg_path: &HgPath, fs_entry: &DirEntry, @@ -321,17 +339,17 @@ &fs_entry.full_path, is_at_repo_root, ) { - for child_fs_entry in children_fs_entries { + children_fs_entries.par_iter().for_each(|child_fs_entry| { self.traverse_fs_only( is_ignored, &hg_path, - &child_fs_entry, + child_fs_entry, ) - } + }) } } if self.options.collect_traversed_dirs { - self.outcome.traversed.push(hg_path.into()) + self.outcome.lock().unwrap().traversed.push(hg_path.into()) } } else if file_or_symlink && self.matcher.matches(&hg_path) { self.mark_unknown_or_ignored(has_ignored_ancestor, hg_path.into()) @@ -339,18 +357,18 @@ } fn mark_unknown_or_ignored( - &mut self, + &self, has_ignored_ancestor: bool, hg_path: Cow<'tree, HgPath>, ) { let is_ignored = has_ignored_ancestor || (self.ignore_fn)(&hg_path); if is_ignored { if self.options.list_ignored { - self.outcome.ignored.push(hg_path) + self.outcome.lock().unwrap().ignored.push(hg_path) } } else { if self.options.list_unknown { - self.outcome.unknown.push(hg_path) + self.outcome.lock().unwrap().unknown.push(hg_path) } } }