changeset 45357:27424779c5b8

hg-core: make parse_dirstate return references rather than update hashmaps Returing a vec is faster than updating a hashmap when the hashmap is not needed like in `hg files` which just list tracked files. Returning references avoid copying data when not needed improving performence for large repositories. Differential Revision: https://phab.mercurial-scm.org/D8861
author Antoine Cezar <antoine.cezar@octobus.net>
date Tue, 04 Aug 2020 10:59:43 +0200
parents f025b97f3758
children 452ece5654c5
files rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/dirstate/parsers.rs rust/hg-cpython/src/parsers.rs
diffstat 3 files changed, 68 insertions(+), 47 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/dirstate_map.rs	Fri Aug 07 18:01:48 2020 +0530
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs	Tue Aug 04 10:59:43 2020 +0200
@@ -364,11 +364,17 @@
             return Ok(None);
         }
 
-        let parents = parse_dirstate(
-            &mut self.state_map,
-            &mut self.copy_map,
-            file_contents,
-        )?;
+        let (parents, entries, copies) = parse_dirstate(file_contents)?;
+        self.state_map.extend(
+            entries
+                .into_iter()
+                .map(|(path, entry)| (path.to_owned(), entry)),
+        );
+        self.copy_map.extend(
+            copies
+                .into_iter()
+                .map(|(path, copy)| (path.to_owned(), copy.to_owned())),
+        );
 
         if !self.dirty_parents {
             self.set_parents(&parents);
--- a/rust/hg-core/src/dirstate/parsers.rs	Fri Aug 07 18:01:48 2020 +0530
+++ b/rust/hg-core/src/dirstate/parsers.rs	Tue Aug 04 10:59:43 2020 +0200
@@ -19,17 +19,21 @@
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
-// TODO parse/pack: is mutate-on-loop better for performance?
+type ParseResult<'a> = (
+    DirstateParents,
+    Vec<(&'a HgPath, DirstateEntry)>,
+    Vec<(&'a HgPath, &'a HgPath)>,
+);
 
 #[timed]
 pub fn parse_dirstate(
-    state_map: &mut StateMap,
-    copy_map: &mut CopyMap,
     contents: &[u8],
-) -> Result<DirstateParents, DirstateParseError> {
+) -> Result<ParseResult, DirstateParseError> {
     if contents.len() < PARENT_SIZE * 2 {
         return Err(DirstateParseError::TooLittleData);
     }
+    let mut copies = vec![];
+    let mut entries = vec![];
 
     let mut curr_pos = PARENT_SIZE * 2;
     let parents = DirstateParents {
@@ -63,24 +67,21 @@
         };
 
         if let Some(copy_path) = copy {
-            copy_map.insert(
-                HgPath::new(path).to_owned(),
-                HgPath::new(copy_path).to_owned(),
-            );
+            copies.push((HgPath::new(path), HgPath::new(copy_path)));
         };
-        state_map.insert(
-            HgPath::new(path).to_owned(),
+        entries.push((
+            HgPath::new(path),
             DirstateEntry {
                 state,
                 mode,
                 size,
                 mtime,
             },
-        );
+        ));
         curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
     }
 
-    Ok(parents)
+    Ok((parents, entries, copies))
 }
 
 /// `now` is the duration in seconds since the Unix epoch
@@ -285,14 +286,17 @@
             pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                 .unwrap();
 
-        let mut new_state_map: StateMap = FastHashMap::default();
-        let mut new_copy_map: CopyMap = FastHashMap::default();
-        let new_parents = parse_dirstate(
-            &mut new_state_map,
-            &mut new_copy_map,
-            result.as_slice(),
-        )
-        .unwrap();
+        let (new_parents, entries, copies) =
+            parse_dirstate(result.as_slice()).unwrap();
+        let new_state_map: StateMap = entries
+            .into_iter()
+            .map(|(path, entry)| (path.to_owned(), entry))
+            .collect();
+        let new_copy_map: CopyMap = copies
+            .into_iter()
+            .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+            .collect();
+
         assert_eq!(
             (parents, state_map, copymap),
             (new_parents, new_state_map, new_copy_map)
@@ -360,14 +364,17 @@
             pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                 .unwrap();
 
-        let mut new_state_map: StateMap = FastHashMap::default();
-        let mut new_copy_map: CopyMap = FastHashMap::default();
-        let new_parents = parse_dirstate(
-            &mut new_state_map,
-            &mut new_copy_map,
-            result.as_slice(),
-        )
-        .unwrap();
+        let (new_parents, entries, copies) =
+            parse_dirstate(result.as_slice()).unwrap();
+        let new_state_map: StateMap = entries
+            .into_iter()
+            .map(|(path, entry)| (path.to_owned(), entry))
+            .collect();
+        let new_copy_map: CopyMap = copies
+            .into_iter()
+            .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+            .collect();
+
         assert_eq!(
             (parents, state_map, copymap),
             (new_parents, new_state_map, new_copy_map)
@@ -403,14 +410,16 @@
             pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
                 .unwrap();
 
-        let mut new_state_map: StateMap = FastHashMap::default();
-        let mut new_copy_map: CopyMap = FastHashMap::default();
-        let new_parents = parse_dirstate(
-            &mut new_state_map,
-            &mut new_copy_map,
-            result.as_slice(),
-        )
-        .unwrap();
+        let (new_parents, entries, copies) =
+            parse_dirstate(result.as_slice()).unwrap();
+        let new_state_map: StateMap = entries
+            .into_iter()
+            .map(|(path, entry)| (path.to_owned(), entry))
+            .collect();
+        let new_copy_map: CopyMap = copies
+            .into_iter()
+            .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+            .collect();
 
         assert_eq!(
             (
--- a/rust/hg-cpython/src/parsers.rs	Fri Aug 07 18:01:48 2020 +0530
+++ b/rust/hg-cpython/src/parsers.rs	Tue Aug 04 10:59:43 2020 +0200
@@ -14,7 +14,7 @@
     PythonObject, ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf,
+    pack_dirstate, parse_dirstate, utils::hg_path::HgPathBuf, DirstateEntry,
     DirstatePackError, DirstateParents, DirstateParseError, FastHashMap,
     PARENT_SIZE,
 };
@@ -29,11 +29,17 @@
     copymap: PyDict,
     st: PyBytes,
 ) -> PyResult<PyTuple> {
-    let mut dirstate_map = FastHashMap::default();
-    let mut copies = FastHashMap::default();
+    match parse_dirstate(st.data(py)) {
+        Ok((parents, entries, copies)) => {
+            let dirstate_map: FastHashMap<HgPathBuf, DirstateEntry> = entries
+                .into_iter()
+                .map(|(path, entry)| (path.to_owned(), entry))
+                .collect();
+            let copy_map: FastHashMap<HgPathBuf, HgPathBuf> = copies
+                .into_iter()
+                .map(|(path, copy)| (path.to_owned(), copy.to_owned()))
+                .collect();
 
-    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
-        Ok(parents) => {
             for (filename, entry) in &dirstate_map {
                 dmap.set_item(
                     py,
@@ -41,7 +47,7 @@
                     make_dirstate_tuple(py, entry)?,
                 )?;
             }
-            for (path, copy_path) in copies {
+            for (path, copy_path) in copy_map {
                 copymap.set_item(
                     py,
                     PyBytes::new(py, path.as_bytes()),