hg-core: make parse_dirstate return references rather than update hashmaps
Returing a vec is faster than updating a hashmap when the hashmap is not needed
like in `hg files` which just list tracked files.
Returning references avoid copying data when not needed improving performence
for large repositories.
Differential Revision: https://phab.mercurial-scm.org/D8861
--- a/rust/hg-core/src/dirstate/dirstate_map.rs Fri Aug 07 18:01:48 2020 +0530
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs Tue Aug 04 10:59:43 2020 +0200
@@ -364,11 +364,17 @@
return Ok(None);
}
- let parents = parse_dirstate(
- &mut self.state_map,
- &mut self.copy_map,
- file_contents,
- )?;
+ let (parents, entries, copies) = parse_dirstate(file_contents)?;
+ self.state_map.extend(
+ entries
+ .into_iter()
+ .map(|(path, entry)| (path.to_owned(), entry)),
+ );
+ self.copy_map.extend(
+ copies
+ .into_iter()
+ .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+ );
if !self.dirty_parents {
self.set_parents(&parents);
--- a/rust/hg-core/src/dirstate/parsers.rs Fri Aug 07 18:01:48 2020 +0530
+++ b/rust/hg-core/src/dirstate/parsers.rs Tue Aug 04 10:59:43 2020 +0200
@@ -19,17 +19,21 @@
/// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
const MIN_ENTRY_SIZE: usize = 17;
-// TODO parse/pack: is mutate-on-loop better for performance?
+type ParseResult<'a> = (
+ DirstateParents,
+ Vec<(&'a HgPath, DirstateEntry)>,
+ Vec<(&'a HgPath, &'a HgPath)>,
+);
#[timed]
pub fn parse_dirstate(
- state_map: &mut StateMap,
- copy_map: &mut CopyMap,
contents: &[u8],
-) -> Result<DirstateParents, DirstateParseError> {
+) -> Result<ParseResult, DirstateParseError> {
if contents.len() < PARENT_SIZE * 2 {
return Err(DirstateParseError::TooLittleData);
}
+ let mut copies = vec![];
+ let mut entries = vec![];
let mut curr_pos = PARENT_SIZE * 2;
let parents = DirstateParents {
@@ -63,24 +67,21 @@
};
if let Some(copy_path) = copy {
- copy_map.insert(
- HgPath::new(path).to_owned(),
- HgPath::new(copy_path).to_owned(),
- );
+ copies.push((HgPath::new(path), HgPath::new(copy_path)));
};
- state_map.insert(
- HgPath::new(path).to_owned(),
+ entries.push((
+ HgPath::new(path),
DirstateEntry {
state,
mode,
size,
mtime,
},
- );
+ ));
curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
}
- Ok(parents)
+ Ok((parents, entries, copies))
}
/// `now` is the duration in seconds since the Unix epoch
@@ -285,14 +286,17 @@
pack_dirstate(&mut state_map, ©map, parents.clone(), now)
.unwrap();
- let mut new_state_map: StateMap = FastHashMap::default();
- let mut new_copy_map: CopyMap = FastHashMap::default();
- let new_parents = parse_dirstate(
- &mut new_state_map,
- &mut new_copy_map,
- result.as_slice(),
- )
- .unwrap();
+ let (new_parents, entries, copies) =
+ parse_dirstate(result.as_slice()).unwrap();
+ let new_state_map: StateMap = entries
+ .into_iter()
+ .map(|(path, entry)| (path.to_owned(), entry))
+ .collect();
+ let new_copy_map: CopyMap = copies
+ .into_iter()
+ .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+ .collect();
+
assert_eq!(
(parents, state_map, copymap),
(new_parents, new_state_map, new_copy_map)
@@ -360,14 +364,17 @@
pack_dirstate(&mut state_map, ©map, parents.clone(), now)
.unwrap();
- let mut new_state_map: StateMap = FastHashMap::default();
- let mut new_copy_map: CopyMap = FastHashMap::default();
- let new_parents = parse_dirstate(
- &mut new_state_map,
- &mut new_copy_map,
- result.as_slice(),
- )
- .unwrap();
+ let (new_parents, entries, copies) =
+ parse_dirstate(result.as_slice()).unwrap();
+ let new_state_map: StateMap = entries
+ .into_iter()
+ .map(|(path, entry)| (path.to_owned(), entry))
+ .collect();
+ let new_copy_map: CopyMap = copies
+ .into_iter()
+ .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+ .collect();
+
assert_eq!(
(parents, state_map, copymap),
(new_parents, new_state_map, new_copy_map)
@@ -403,14 +410,16 @@
pack_dirstate(&mut state_map, ©map, parents.clone(), now)
.unwrap();
- let mut new_state_map: StateMap = FastHashMap::default();
- let mut new_copy_map: CopyMap = FastHashMap::default();
- let new_parents = parse_dirstate(
- &mut new_state_map,
- &mut new_copy_map,
- result.as_slice(),
- )
- .unwrap();
+ let (new_parents, entries, copies) =
+ parse_dirstate(result.as_slice()).unwrap();
+ let new_state_map: StateMap = entries
+ .into_iter()
+ .map(|(path, entry)| (path.to_owned(), entry))
+ .collect();
+ let new_copy_map: CopyMap = copies
+ .into_iter()
+ .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+ .collect();
assert_eq!(
(
--- a/rust/hg-cpython/src/parsers.rs Fri Aug 07 18:01:48 2020 +0530
+++ b/rust/hg-cpython/src/parsers.rs Tue Aug 04 10:59:43 2020 +0200
@@ -14,7 +14,7 @@
PythonObject, ToPyObject,
};
use hg::{
- pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
+ pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, DirstateEntry,
DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
PARENT_SIZE,
};
@@ -29,11 +29,17 @@
copymap: PyDict,
st: PyBytes,
) -> PyResult<PyTuple> {
- let mut dirstate_map = FastHashMap::default();
- let mut copies = FastHashMap::default();
+ match parse_dirstate(st.data(py)) {
+ Ok((parents, entries, copies)) => {
+ let dirstate_map: FastHashMap<HgPathBuf, DirstateEntry> = entries
+ .into_iter()
+ .map(|(path, entry)| (path.to_owned(), entry))
+ .collect();
+ let copy_map: FastHashMap<HgPathBuf, HgPathBuf> = copies
+ .into_iter()
+ .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+ .collect();
- match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
- Ok(parents) => {
for (filename, entry) in &dirstate_map {
dmap.set_item(
py,
@@ -41,7 +47,7 @@
make_dirstate_tuple(py, entry)?,
)?;
}
- for (path, copy_path) in copies {
+ for (path, copy_path) in copy_map {
copymap.set_item(
py,
PyBytes::new(py, path.as_bytes()),