Mercurial > hg
changeset 45357:27424779c5b8
hg-core: make parse_dirstate return references rather than update hashmaps
Returing a vec is faster than updating a hashmap when the hashmap is not needed
like in `hg files` which just list tracked files.
Returning references avoid copying data when not needed improving performence
for large repositories.
Differential Revision: https://phab.mercurial-scm.org/D8861
author | Antoine Cezar <antoine.cezar@octobus.net> |
---|---|
date | Tue, 04 Aug 2020 10:59:43 +0200 |
parents | f025b97f3758 |
children | 452ece5654c5 |
files | rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/dirstate/parsers.rs rust/hg-cpython/src/parsers.rs |
diffstat | 3 files changed, 68 insertions(+), 47 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/dirstate_map.rs Fri Aug 07 18:01:48 2020 +0530 +++ b/rust/hg-core/src/dirstate/dirstate_map.rs Tue Aug 04 10:59:43 2020 +0200 @@ -364,11 +364,17 @@ return Ok(None); } - let parents = parse_dirstate( - &mut self.state_map, - &mut self.copy_map, - file_contents, - )?; + let (parents, entries, copies) = parse_dirstate(file_contents)?; + self.state_map.extend( + entries + .into_iter() + .map(|(path, entry)| (path.to_owned(), entry)), + ); + self.copy_map.extend( + copies + .into_iter() + .map(|(path, copy)| (path.to_owned(), copy.to_owned())), + ); if !self.dirty_parents { self.set_parents(&parents);
--- a/rust/hg-core/src/dirstate/parsers.rs Fri Aug 07 18:01:48 2020 +0530 +++ b/rust/hg-core/src/dirstate/parsers.rs Tue Aug 04 10:59:43 2020 +0200 @@ -19,17 +19,21 @@ /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits. const MIN_ENTRY_SIZE: usize = 17; -// TODO parse/pack: is mutate-on-loop better for performance? +type ParseResult<'a> = ( + DirstateParents, + Vec<(&'a HgPath, DirstateEntry)>, + Vec<(&'a HgPath, &'a HgPath)>, +); #[timed] pub fn parse_dirstate( - state_map: &mut StateMap, - copy_map: &mut CopyMap, contents: &[u8], -) -> Result<DirstateParents, DirstateParseError> { +) -> Result<ParseResult, DirstateParseError> { if contents.len() < PARENT_SIZE * 2 { return Err(DirstateParseError::TooLittleData); } + let mut copies = vec![]; + let mut entries = vec![]; let mut curr_pos = PARENT_SIZE * 2; let parents = DirstateParents { @@ -63,24 +67,21 @@ }; if let Some(copy_path) = copy { - copy_map.insert( - HgPath::new(path).to_owned(), - HgPath::new(copy_path).to_owned(), - ); + copies.push((HgPath::new(path), HgPath::new(copy_path))); }; - state_map.insert( - HgPath::new(path).to_owned(), + entries.push(( + HgPath::new(path), DirstateEntry { state, mode, size, mtime, }, - ); + )); curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); } - Ok(parents) + Ok((parents, entries, copies)) } /// `now` is the duration in seconds since the Unix epoch @@ -285,14 +286,17 @@ pack_dirstate(&mut state_map, ©map, parents.clone(), now) .unwrap(); - let mut new_state_map: StateMap = FastHashMap::default(); - let mut new_copy_map: CopyMap = FastHashMap::default(); - let new_parents = parse_dirstate( - &mut new_state_map, - &mut new_copy_map, - result.as_slice(), - ) - .unwrap(); + let (new_parents, entries, copies) = + parse_dirstate(result.as_slice()).unwrap(); + let new_state_map: StateMap = entries + .into_iter() + .map(|(path, entry)| (path.to_owned(), entry)) + .collect(); + let new_copy_map: CopyMap = copies + .into_iter() + .map(|(path, copy)| (path.to_owned(), copy.to_owned())) + .collect(); + assert_eq!( (parents, state_map, copymap), (new_parents, new_state_map, new_copy_map) @@ -360,14 +364,17 @@ pack_dirstate(&mut state_map, ©map, parents.clone(), now) .unwrap(); - let mut new_state_map: StateMap = FastHashMap::default(); - let mut new_copy_map: CopyMap = FastHashMap::default(); - let new_parents = parse_dirstate( - &mut new_state_map, - &mut new_copy_map, - result.as_slice(), - ) - .unwrap(); + let (new_parents, entries, copies) = + parse_dirstate(result.as_slice()).unwrap(); + let new_state_map: StateMap = entries + .into_iter() + .map(|(path, entry)| (path.to_owned(), entry)) + .collect(); + let new_copy_map: CopyMap = copies + .into_iter() + .map(|(path, copy)| (path.to_owned(), copy.to_owned())) + .collect(); + assert_eq!( (parents, state_map, copymap), (new_parents, new_state_map, new_copy_map) @@ -403,14 +410,16 @@ pack_dirstate(&mut state_map, ©map, parents.clone(), now) .unwrap(); - let mut new_state_map: StateMap = FastHashMap::default(); - let mut new_copy_map: CopyMap = FastHashMap::default(); - let new_parents = parse_dirstate( - &mut new_state_map, - &mut new_copy_map, - result.as_slice(), - ) - .unwrap(); + let (new_parents, entries, copies) = + parse_dirstate(result.as_slice()).unwrap(); + let new_state_map: StateMap = entries + .into_iter() + .map(|(path, entry)| (path.to_owned(), entry)) + .collect(); + let new_copy_map: CopyMap = copies + .into_iter() + .map(|(path, copy)| (path.to_owned(), copy.to_owned())) + .collect(); assert_eq!( (
--- a/rust/hg-cpython/src/parsers.rs Fri Aug 07 18:01:48 2020 +0530 +++ b/rust/hg-cpython/src/parsers.rs Tue Aug 04 10:59:43 2020 +0200 @@ -14,7 +14,7 @@ PythonObject, ToPyObject, }; use hg::{ - pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, + pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError, FastHashMap, PARENT_SIZE, }; @@ -29,11 +29,17 @@ copymap: PyDict, st: PyBytes, ) -> PyResult<PyTuple> { - let mut dirstate_map = FastHashMap::default(); - let mut copies = FastHashMap::default(); + match parse_dirstate(st.data(py)) { + Ok((parents, entries, copies)) => { + let dirstate_map: FastHashMap<HgPathBuf, DirstateEntry> = entries + .into_iter() + .map(|(path, entry)| (path.to_owned(), entry)) + .collect(); + let copy_map: FastHashMap<HgPathBuf, HgPathBuf> = copies + .into_iter() + .map(|(path, copy)| (path.to_owned(), copy.to_owned())) + .collect(); - match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) { - Ok(parents) => { for (filename, entry) in &dirstate_map { dmap.set_item( py, @@ -41,7 +47,7 @@ make_dirstate_tuple(py, entry)?, )?; } - for (path, copy_path) in copies { + for (path, copy_path) in copy_map { copymap.set_item( py, PyBytes::new(py, path.as_bytes()),