Mercurial > hg-stable
changeset 47107:787ff5d21bcd
dirstate-tree: Make Rust DirstateMap bindings go through a trait object
This changeset starts a series that adds an experiment to make status faster
by changing the dirstate (first only in memory and later also on disk) to
be shaped as a tree matching the directory structure, instead of the current
flat collection of entries. The status algorithm can then traverse this tree
dirstate at the same time as it traverses the filesystem.
We (Octobus) have made prototypes that show promising results but are prone
to bitrot. We would like to start upstreaming some experimental Rust code that
goes in this direction, but to avoid disrupting users it should only be
enabled by some run-time opt-in while keeping the existing dirstate structure
and status algorithm as-is.
The `DirstateMap` type and `status` function look like the appropriate
boundary. This adds a new trait that abstracts everything Python bindings need
and makes those bindings go through a `dyn` trait object. Later we’ll have two
implementations of this trait, and the same bindings can use either.
Differential Revision: https://phab.mercurial-scm.org/D10362
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Tue, 30 Mar 2021 14:15:23 +0200 |
parents | 102a50746bc5 |
children | e061a1df32a8 |
files | rust/hg-core/src/dirstate.rs rust/hg-core/src/dirstate/dirs_multiset.rs rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/dirstate/status.rs rust/hg-core/src/dirstate_tree.rs rust/hg-core/src/dirstate_tree/dispatch.rs rust/hg-core/src/lib.rs rust/hg-core/src/operations/dirstate_status.rs rust/hg-cpython/src/dirstate/dirstate_map.rs rust/hg-cpython/src/dirstate/status.rs |
diffstat | 10 files changed, 392 insertions(+), 78 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-core/src/dirstate.rs Tue Mar 30 14:15:23 2021 +0200 @@ -9,7 +9,6 @@ use crate::revlog::Node; use crate::{utils::hg_path::HgPathBuf, FastHashMap}; use bytes_cast::{unaligned, BytesCast}; -use std::collections::hash_map; use std::convert::TryFrom; pub mod dirs_multiset; @@ -51,10 +50,12 @@ pub const SIZE_FROM_OTHER_PARENT: i32 = -2; pub type StateMap = FastHashMap<HgPathBuf, DirstateEntry>; -pub type StateMapIter<'a> = hash_map::Iter<'a, HgPathBuf, DirstateEntry>; +pub type StateMapIter<'a> = + Box<dyn Iterator<Item = (&'a HgPathBuf, &'a DirstateEntry)> + Send + 'a>; pub type CopyMap = FastHashMap<HgPathBuf, HgPathBuf>; -pub type CopyMapIter<'a> = hash_map::Iter<'a, HgPathBuf, HgPathBuf>; +pub type CopyMapIter<'a> = + Box<dyn Iterator<Item = (&'a HgPathBuf, &'a HgPathBuf)> + Send + 'a>; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum EntryState {
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-core/src/dirstate/dirs_multiset.rs Tue Mar 30 14:15:23 2021 +0200 @@ -14,7 +14,7 @@ files, hg_path::{HgPath, HgPathBuf, HgPathError}, }, - DirstateEntry, DirstateMapError, FastHashMap, StateMap, + DirstateEntry, DirstateMapError, FastHashMap, }; use std::collections::{hash_map, hash_map::Entry, HashMap, HashSet}; @@ -30,14 +30,14 @@ /// Initializes the multiset from a dirstate. /// /// If `skip_state` is provided, skips dirstate entries with equal state. - pub fn from_dirstate( - dirstate: &StateMap, + pub fn from_dirstate<'a>( + dirstate: impl IntoIterator<Item = (&'a HgPathBuf, &'a DirstateEntry)>, skip_state: Option<EntryState>, ) -> Result<Self, DirstateMapError> { let mut multiset = DirsMultiset { inner: FastHashMap::default(), }; - for (filename, DirstateEntry { state, .. }) in dirstate.iter() { + for (filename, DirstateEntry { state, .. }) in dirstate { // This `if` is optimized out of the loop if let Some(skip) = skip_state { if skip != *state { @@ -207,6 +207,7 @@ #[cfg(test)] mod tests { use super::*; + use crate::StateMap; #[test] fn test_delete_path_path_not_found() { @@ -356,7 +357,7 @@ }; assert_eq!(expected, new); - let input_map = ["b/x", "a/c", "a/d/x"] + let input_map: HashMap<_, _> = ["b/x", "a/c", "a/d/x"] .iter() .map(|f| { ( @@ -384,7 +385,7 @@ #[test] fn test_dirsmultiset_new_skip() { - let input_map = [ + let input_map: HashMap<_, _> = [ ("a/", EntryState::Normal), ("a/b", EntryState::Normal), ("a/c", EntryState::Removed),
--- a/rust/hg-core/src/dirstate/dirstate_map.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-core/src/dirstate/dirstate_map.rs Tue Mar 30 14:15:23 2021 +0200 @@ -289,8 +289,10 @@ /// good idea. pub fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { if self.all_dirs.is_none() { - self.all_dirs = - Some(DirsMultiset::from_dirstate(&self.state_map, None)?); + self.all_dirs = Some(DirsMultiset::from_dirstate( + self.state_map.iter(), + None, + )?); } Ok(()) }
--- a/rust/hg-core/src/dirstate/status.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-core/src/dirstate/status.rs Tue Mar 30 14:15:23 2021 +0200 @@ -292,7 +292,7 @@ /// Gives information about which files are changed in the working directory /// and how, compared to the revision we're based on -pub struct Status<'a, M: Matcher + Sync> { +pub struct Status<'a, M: ?Sized + Matcher + Sync> { dmap: &'a DirstateMap, pub(crate) matcher: &'a M, root_dir: PathBuf, @@ -302,7 +302,7 @@ impl<'a, M> Status<'a, M> where - M: Matcher + Sync, + M: ?Sized + Matcher + Sync, { pub fn new( dmap: &'a DirstateMap, @@ -898,7 +898,7 @@ #[timed] pub fn status<'a>( dmap: &'a DirstateMap, - matcher: &'a (impl Matcher + Sync), + matcher: &'a (dyn Matcher + Sync), root_dir: PathBuf, ignore_files: Vec<PathBuf>, options: StatusOptions,
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate_tree.rs Tue Mar 30 14:15:23 2021 +0200 @@ -0,0 +1,1 @@ +pub mod dispatch;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs Tue Mar 30 14:15:23 2021 +0200 @@ -0,0 +1,310 @@ +use std::collections::HashSet; +use std::path::PathBuf; +use std::time::Duration; + +use crate::matchers::Matcher; +use crate::utils::hg_path::{HgPath, HgPathBuf}; +use crate::CopyMapIter; +use crate::DirstateEntry; +use crate::DirstateError; +use crate::DirstateMap; +use crate::DirstateMapError; +use crate::DirstateParents; +use crate::DirstateStatus; +use crate::EntryState; +use crate::FastHashMap; +use crate::HgPathCow; +use crate::PatternFileWarning; +use crate::StateMapIter; +use crate::StatusError; +use crate::StatusOptions; + +pub trait DirstateMapMethods { + fn clear(&mut self); + + fn add_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + entry: DirstateEntry, + ) -> Result<(), DirstateMapError>; + + fn remove_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + size: i32, + ) -> Result<(), DirstateMapError>; + + fn drop_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + ) -> Result<bool, DirstateMapError>; + + fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32); + + fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool; + + fn non_normal_entries_union( + &mut self, + other: HashSet<HgPathBuf>, + ) -> Vec<HgPathBuf>; + + fn set_non_normal_other_parent_entries(&mut self, force: bool); + + fn get_non_normal_other_parent_entries_panic( + &self, + ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>); + + fn get_non_normal_other_parent_entries( + &mut self, + ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>); + + fn has_tracked_dir( + &mut self, + directory: &HgPath, + ) -> Result<bool, DirstateMapError>; + + fn has_dir( + &mut self, + directory: &HgPath, + ) -> Result<bool, DirstateMapError>; + + fn parents( + &mut self, + file_contents: &[u8], + ) -> Result<&DirstateParents, DirstateError>; + + fn set_parents(&mut self, parents: &DirstateParents); + + fn read<'a>( + &mut self, + file_contents: &'a [u8], + ) -> Result<Option<&'a DirstateParents>, DirstateError>; + + fn pack( + &mut self, + parents: DirstateParents, + now: Duration, + ) -> Result<Vec<u8>, DirstateError>; + + fn build_file_fold_map(&mut self) -> &FastHashMap<HgPathBuf, HgPathBuf>; + + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError>; + + fn set_dirs(&mut self) -> Result<(), DirstateMapError>; + + fn status<'a>( + &'a self, + matcher: &'a (dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec<PathBuf>, + options: StatusOptions, + ) -> Result< + ( + (Vec<HgPathCow<'a>>, DirstateStatus<'a>), + Vec<PatternFileWarning>, + ), + StatusError, + >; + + fn copy_map_len(&self) -> usize; + + fn copy_map_iter(&self) -> CopyMapIter<'_>; + + fn copy_map_contains_key(&self, key: &HgPath) -> bool; + + fn copy_map_get(&self, key: &HgPath) -> Option<&HgPathBuf>; + + fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf>; + + fn copy_map_insert( + &mut self, + key: HgPathBuf, + value: HgPathBuf, + ) -> Option<HgPathBuf>; + + fn len(&self) -> usize; + + fn contains_key(&self, key: &HgPath) -> bool; + + fn get(&self, key: &HgPath) -> Option<&DirstateEntry>; + + fn iter(&self) -> StateMapIter<'_>; +} + +impl DirstateMapMethods for DirstateMap { + fn clear(&mut self) { + self.clear() + } + + fn add_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + entry: DirstateEntry, + ) -> Result<(), DirstateMapError> { + self.add_file(filename, old_state, entry) + } + + fn remove_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + size: i32, + ) -> Result<(), DirstateMapError> { + self.remove_file(filename, old_state, size) + } + + fn drop_file( + &mut self, + filename: &HgPath, + old_state: EntryState, + ) -> Result<bool, DirstateMapError> { + self.drop_file(filename, old_state) + } + + fn clear_ambiguous_times(&mut self, filenames: Vec<HgPathBuf>, now: i32) { + self.clear_ambiguous_times(filenames, now) + } + + fn non_normal_entries_remove(&mut self, key: &HgPath) -> bool { + self.non_normal_entries_remove(key) + } + + fn non_normal_entries_union( + &mut self, + other: HashSet<HgPathBuf>, + ) -> Vec<HgPathBuf> { + self.non_normal_entries_union(other) + } + + fn set_non_normal_other_parent_entries(&mut self, force: bool) { + self.set_non_normal_other_parent_entries(force) + } + + fn get_non_normal_other_parent_entries_panic( + &self, + ) -> (&HashSet<HgPathBuf>, &HashSet<HgPathBuf>) { + self.get_non_normal_other_parent_entries_panic() + } + + fn get_non_normal_other_parent_entries( + &mut self, + ) -> (&mut HashSet<HgPathBuf>, &mut HashSet<HgPathBuf>) { + self.get_non_normal_other_parent_entries() + } + + fn has_tracked_dir( + &mut self, + directory: &HgPath, + ) -> Result<bool, DirstateMapError> { + self.has_tracked_dir(directory) + } + + fn has_dir( + &mut self, + directory: &HgPath, + ) -> Result<bool, DirstateMapError> { + self.has_dir(directory) + } + + fn parents( + &mut self, + file_contents: &[u8], + ) -> Result<&DirstateParents, DirstateError> { + self.parents(file_contents) + } + + fn set_parents(&mut self, parents: &DirstateParents) { + self.set_parents(parents) + } + + fn read<'a>( + &mut self, + file_contents: &'a [u8], + ) -> Result<Option<&'a DirstateParents>, DirstateError> { + self.read(file_contents) + } + + fn pack( + &mut self, + parents: DirstateParents, + now: Duration, + ) -> Result<Vec<u8>, DirstateError> { + self.pack(parents, now) + } + + fn build_file_fold_map(&mut self) -> &FastHashMap<HgPathBuf, HgPathBuf> { + self.build_file_fold_map() + } + + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { + self.set_all_dirs() + } + + fn set_dirs(&mut self) -> Result<(), DirstateMapError> { + self.set_dirs() + } + + fn status<'a>( + &'a self, + matcher: &'a (dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec<PathBuf>, + options: StatusOptions, + ) -> Result< + ( + (Vec<HgPathCow<'a>>, DirstateStatus<'a>), + Vec<PatternFileWarning>, + ), + StatusError, + > { + crate::status(self, matcher, root_dir, ignore_files, options) + } + + fn copy_map_len(&self) -> usize { + self.copy_map.len() + } + + fn copy_map_iter(&self) -> CopyMapIter<'_> { + Box::new(self.copy_map.iter()) + } + + fn copy_map_contains_key(&self, key: &HgPath) -> bool { + self.copy_map.contains_key(key) + } + + fn copy_map_get(&self, key: &HgPath) -> Option<&HgPathBuf> { + self.copy_map.get(key) + } + + fn copy_map_remove(&mut self, key: &HgPath) -> Option<HgPathBuf> { + self.copy_map.remove(key) + } + + fn copy_map_insert( + &mut self, + key: HgPathBuf, + value: HgPathBuf, + ) -> Option<HgPathBuf> { + self.copy_map.insert(key, value) + } + + fn len(&self) -> usize { + (&**self).len() + } + + fn contains_key(&self, key: &HgPath) -> bool { + (&**self).contains_key(key) + } + + fn get(&self, key: &HgPath) -> Option<&DirstateEntry> { + (&**self).get(key) + } + + fn iter(&self) -> StateMapIter<'_> { + Box::new((&**self).iter()) + } +}
--- a/rust/hg-core/src/lib.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-core/src/lib.rs Tue Mar 30 14:15:23 2021 +0200 @@ -9,6 +9,7 @@ pub mod errors; pub use ancestors::{AncestorsIterator, LazyAncestors, MissingAncestors}; mod dirstate; +pub mod dirstate_tree; pub mod discovery; pub mod requirements; pub mod testing; // unconditionally built, for use from integration tests
--- a/rust/hg-core/src/operations/dirstate_status.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-core/src/operations/dirstate_status.rs Tue Mar 30 14:15:23 2021 +0200 @@ -14,7 +14,7 @@ /// files. pub type LookupAndStatus<'a> = (Vec<HgPathCow<'a>>, DirstateStatus<'a>); -impl<'a, M: Matcher + Sync> Status<'a, M> { +impl<'a, M: ?Sized + Matcher + Sync> Status<'a, M> { pub(crate) fn run(&self) -> Result<LookupAndStatus<'a>, StatusError> { let (traversed_sender, traversed_receiver) = crossbeam_channel::unbounded();
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Tue Mar 30 14:15:23 2021 +0200 @@ -27,6 +27,7 @@ parsers::dirstate_parents_to_pytuple, }; use hg::{ + dirstate_tree::dispatch::DirstateMapMethods, errors::HgError, revlog::Node, utils::hg_path::{HgPath, HgPathBuf}, @@ -47,10 +48,10 @@ // All attributes also have to have a separate refcount data attribute for // leaks, with all methods that go along for reference sharing. py_class!(pub class DirstateMap |py| { - @shared data inner: RustDirstateMap; + @shared data inner: Box<dyn DirstateMapMethods + Send>; def __new__(_cls, _root: PyObject) -> PyResult<Self> { - let inner = RustDirstateMap::default(); + let inner = Box::new(RustDirstateMap::default()); Self::create_instance(py, inner) } @@ -404,7 +405,7 @@ Dirs::from_inner( py, DirsMultiset::from_dirstate( - &self.inner(py).borrow(), + self.inner(py).borrow().iter(), Some(EntryState::Removed), ) .map_err(|e| { @@ -421,7 +422,7 @@ Dirs::from_inner( py, DirsMultiset::from_dirstate( - &self.inner(py).borrow(), + self.inner(py).borrow().iter(), None, ).map_err(|e| { PyErr::new::<exc::ValueError, _>(py, e.to_string()) @@ -432,7 +433,7 @@ // TODO all copymap* methods, see docstring above def copymapcopy(&self) -> PyResult<PyDict> { let dict = PyDict::new(py); - for (key, value) in self.inner(py).borrow().copy_map.iter() { + for (key, value) in self.inner(py).borrow().copy_map_iter() { dict.set_item( py, PyBytes::new(py, key.as_bytes()), @@ -444,7 +445,7 @@ def copymapgetitem(&self, key: PyObject) -> PyResult<PyBytes> { let key = key.extract::<PyBytes>(py)?; - match self.inner(py).borrow().copy_map.get(HgPath::new(key.data(py))) { + match self.inner(py).borrow().copy_map_get(HgPath::new(key.data(py))) { Some(copy) => Ok(PyBytes::new(py, copy.as_bytes())), None => Err(PyErr::new::<exc::KeyError, _>( py, @@ -457,15 +458,14 @@ } def copymaplen(&self) -> PyResult<usize> { - Ok(self.inner(py).borrow().copy_map.len()) + Ok(self.inner(py).borrow().copy_map_len()) } def copymapcontains(&self, key: PyObject) -> PyResult<bool> { let key = key.extract::<PyBytes>(py)?; Ok(self .inner(py) .borrow() - .copy_map - .contains_key(HgPath::new(key.data(py)))) + .copy_map_contains_key(HgPath::new(key.data(py)))) } def copymapget( &self, @@ -476,8 +476,7 @@ match self .inner(py) .borrow() - .copy_map - .get(HgPath::new(key.data(py))) + .copy_map_get(HgPath::new(key.data(py))) { Some(copy) => Ok(Some( PyBytes::new(py, copy.as_bytes()).into_object(), @@ -492,7 +491,7 @@ ) -> PyResult<PyObject> { let key = key.extract::<PyBytes>(py)?; let value = value.extract::<PyBytes>(py)?; - self.inner(py).borrow_mut().copy_map.insert( + self.inner(py).borrow_mut().copy_map_insert( HgPathBuf::from_bytes(key.data(py)), HgPathBuf::from_bytes(value.data(py)), ); @@ -507,8 +506,7 @@ match self .inner(py) .borrow_mut() - .copy_map - .remove(HgPath::new(key.data(py))) + .copy_map_remove(HgPath::new(key.data(py))) { Some(_) => Ok(None), None => Ok(default), @@ -519,7 +517,7 @@ let leaked_ref = self.inner(py).leak_immutable(); CopyMapKeysIterator::from_inner( py, - unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) }, + unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) }, ) } @@ -527,7 +525,7 @@ let leaked_ref = self.inner(py).leak_immutable(); CopyMapItemsIterator::from_inner( py, - unsafe { leaked_ref.map(py, |o| o.copy_map.iter()) }, + unsafe { leaked_ref.map(py, |o| o.copy_map_iter()) }, ) } @@ -537,7 +535,7 @@ pub fn get_inner<'a>( &'a self, py: Python<'a>, - ) -> Ref<'a, RustDirstateMap> { + ) -> Ref<'a, Box<dyn DirstateMapMethods + Send>> { self.inner(py).borrow() } fn translate_key(
--- a/rust/hg-cpython/src/dirstate/status.rs Wed May 05 18:26:04 2021 -0400 +++ b/rust/hg-cpython/src/dirstate/status.rs Tue Mar 30 14:15:23 2021 +0200 @@ -17,7 +17,7 @@ }; use hg::{ matchers::{AlwaysMatcher, FileMatcher, IncludeMatcher}, - parse_pattern_syntax, status, + parse_pattern_syntax, utils::{ files::{get_bytes_from_path, get_path_from_bytes}, hg_path::{HgPath, HgPathBuf}, @@ -126,21 +126,21 @@ match matcher.get_type(py).name(py).borrow() { "alwaysmatcher" => { let matcher = AlwaysMatcher; - let ((lookup, status_res), warnings) = status( - &dmap, - &matcher, - root_dir.to_path_buf(), - ignore_files, - StatusOptions { - check_exec, - last_normal_time, - list_clean, - list_ignored, - list_unknown, - collect_traversed_dirs, - }, - ) - .map_err(|e| handle_fallback(py, e))?; + let ((lookup, status_res), warnings) = dmap + .status( + &matcher, + root_dir.to_path_buf(), + ignore_files, + StatusOptions { + check_exec, + last_normal_time, + list_clean, + list_ignored, + list_unknown, + collect_traversed_dirs, + }, + ) + .map_err(|e| handle_fallback(py, e))?; build_response(py, lookup, status_res, warnings) } "exactmatcher" => { @@ -163,21 +163,21 @@ let files = files?; let matcher = FileMatcher::new(files.as_ref()) .map_err(|e| PyErr::new::<ValueError, _>(py, e.to_string()))?; - let ((lookup, status_res), warnings) = status( - &dmap, - &matcher, - root_dir.to_path_buf(), - ignore_files, - StatusOptions { - check_exec, - last_normal_time, - list_clean, - list_ignored, - list_unknown, - collect_traversed_dirs, - }, - ) - .map_err(|e| handle_fallback(py, e))?; + let ((lookup, status_res), warnings) = dmap + .status( + &matcher, + root_dir.to_path_buf(), + ignore_files, + StatusOptions { + check_exec, + last_normal_time, + list_clean, + list_ignored, + list_unknown, + collect_traversed_dirs, + }, + ) + .map_err(|e| handle_fallback(py, e))?; build_response(py, lookup, status_res, warnings) } "includematcher" => { @@ -218,21 +218,21 @@ .map_err(|e| handle_fallback(py, e.into()))?; all_warnings.extend(warnings); - let ((lookup, status_res), warnings) = status( - &dmap, - &matcher, - root_dir.to_path_buf(), - ignore_files, - StatusOptions { - check_exec, - last_normal_time, - list_clean, - list_ignored, - list_unknown, - collect_traversed_dirs, - }, - ) - .map_err(|e| handle_fallback(py, e))?; + let ((lookup, status_res), warnings) = dmap + .status( + &matcher, + root_dir.to_path_buf(), + ignore_files, + StatusOptions { + check_exec, + last_normal_time, + list_clean, + list_ignored, + list_unknown, + collect_traversed_dirs, + }, + ) + .map_err(|e| handle_fallback(py, e))?; all_warnings.extend(warnings);