rust-parsers: switch to parse/pack_dirstate to mutate-on-loop
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 09 Jul 2019 11:49:49 +0200
changeset 42748 7cae6bc29ff9
parent 42747 760a7851e9ba
child 42749 7ceded4419a3
rust-parsers: switch to parse/pack_dirstate to mutate-on-loop Both `parse_dirstate` and `pack_dirstate` can operate directly on the data they're passed, which prevents the creation of intermediate data structures, simplifies the function signatures and reduces boilerplate. They are exposed directly to the Python for now, but a later patch will make use of them inside `hg-core`. Differential Revision: https://phab.mercurial-scm.org/D6628
rust/hg-core/src/dirstate.rs
rust/hg-core/src/dirstate/dirs_multiset.rs
rust/hg-core/src/dirstate/parsers.rs
rust/hg-core/src/lib.rs
rust/hg-core/src/utils.rs
rust/hg-cpython/src/dirstate.rs
rust/hg-cpython/src/dirstate/dirs_multiset.rs
rust/hg-cpython/src/parsers.rs
--- a/rust/hg-core/src/dirstate.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-core/src/dirstate.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -1,16 +1,25 @@
+// dirstate module
+//
+// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+use std::collections::HashMap;
+
 pub mod dirs_multiset;
 pub mod parsers;
 
-#[derive(Debug, PartialEq, Copy, Clone)]
-pub struct DirstateParents<'a> {
-    pub p1: &'a [u8],
-    pub p2: &'a [u8],
+#[derive(Debug, PartialEq, Clone)]
+pub struct DirstateParents {
+    pub p1: [u8; 20],
+    pub p2: [u8; 20],
 }
 
 /// The C implementation uses all signed types. This will be an issue
 /// either when 4GB+ source files are commonplace or in 2038, whichever
 /// comes first.
-#[derive(Debug, PartialEq)]
+#[derive(Debug, PartialEq, Copy, Clone)]
 pub struct DirstateEntry {
     pub state: i8,
     pub mode: i32,
@@ -18,19 +27,12 @@
     pub size: i32,
 }
 
-pub type DirstateVec = Vec<(Vec<u8>, DirstateEntry)>;
-
-#[derive(Debug, PartialEq)]
-pub struct CopyVecEntry<'a> {
-    pub path: &'a [u8],
-    pub copy_path: &'a [u8],
-}
-
-pub type CopyVec<'a> = Vec<CopyVecEntry<'a>>;
+pub type StateMap = HashMap<Vec<u8>, DirstateEntry>;
+pub type CopyMap = HashMap<Vec<u8>, Vec<u8>>;
 
 /// The Python implementation passes either a mapping (dirstate) or a flat
 /// iterable (manifest)
-pub enum DirsIterable {
-    Dirstate(DirstateVec),
-    Manifest(Vec<Vec<u8>>),
+pub enum DirsIterable<'a> {
+    Dirstate(&'a HashMap<Vec<u8>, DirstateEntry>),
+    Manifest(&'a Vec<Vec<u8>>),
 }
--- a/rust/hg-core/src/dirstate/dirs_multiset.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-core/src/dirstate/dirs_multiset.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -28,10 +28,10 @@
 
         match iterable {
             DirsIterable::Dirstate(vec) => {
-                for (ref filename, DirstateEntry { state, .. }) in vec {
+                for (filename, DirstateEntry { state, .. }) in vec {
                     // This `if` is optimized out of the loop
                     if let Some(skip) = skip_state {
-                        if skip != state {
+                        if skip != *state {
                             multiset.add_path(filename);
                         }
                     } else {
@@ -40,7 +40,7 @@
                 }
             }
             DirsIterable::Manifest(vec) => {
-                for ref filename in vec {
+                for filename in vec {
                     multiset.add_path(filename);
                 }
             }
@@ -108,10 +108,11 @@
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_delete_path_path_not_found() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"doesnotexist/";
         assert_eq!(
             Err(DirstateMapError::PathNotFound(path.to_vec())),
@@ -122,7 +123,7 @@
     #[test]
     fn test_delete_path_empty_path() {
         let mut map =
-            DirsMultiset::new(DirsIterable::Manifest(vec![vec![]]), None);
+            DirsMultiset::new(DirsIterable::Manifest(&vec![vec![]]), None);
         let path = b"";
         assert_eq!(Ok(()), map.delete_path(path));
         assert_eq!(
@@ -162,7 +163,7 @@
 
     #[test]
     fn test_add_path_empty_path() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
         let path = b"";
         map.add_path(path);
 
@@ -171,7 +172,7 @@
 
     #[test]
     fn test_add_path_successful() {
-        let mut map = DirsMultiset::new(DirsIterable::Manifest(vec![]), None);
+        let mut map = DirsMultiset::new(DirsIterable::Manifest(&vec![]), None);
 
         map.add_path(b"a/");
         assert_eq!(1, *map.inner.get(&b"a".to_vec()).unwrap());
@@ -218,13 +219,13 @@
     fn test_dirsmultiset_new_empty() {
         use DirsIterable::{Dirstate, Manifest};
 
-        let new = DirsMultiset::new(Manifest(vec![]), None);
+        let new = DirsMultiset::new(Manifest(&vec![]), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
         assert_eq!(expected, new);
 
-        let new = DirsMultiset::new(Dirstate(vec![]), None);
+        let new = DirsMultiset::new(Dirstate(&HashMap::new()), None);
         let expected = DirsMultiset {
             inner: HashMap::new(),
         };
@@ -244,7 +245,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), None);
+        let new = DirsMultiset::new(Manifest(&input_vec), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -269,7 +270,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), None);
+        let new = DirsMultiset::new(Dirstate(&input_map), None);
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -289,7 +290,7 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Manifest(input_vec), Some('n' as i8));
+        let new = DirsMultiset::new(Manifest(&input_vec), Some('n' as i8));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
@@ -318,11 +319,10 @@
             .map(|(k, v)| (k.as_bytes().to_vec(), *v))
             .collect();
 
-        let new = DirsMultiset::new(Dirstate(input_map), Some('n' as i8));
+        let new = DirsMultiset::new(Dirstate(&input_map), Some('n' as i8));
         let expected = DirsMultiset {
             inner: expected_inner,
         };
         assert_eq!(expected, new);
     }
-
 }
--- a/rust/hg-core/src/dirstate/parsers.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-core/src/dirstate/parsers.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -4,31 +4,35 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::{
-    CopyVec, CopyVecEntry, DirstateEntry, DirstatePackError, DirstateParents,
-    DirstateParseError, DirstateVec,
+    dirstate::{CopyMap, StateMap},
+    utils::copy_into_array,
+    DirstateEntry, DirstatePackError, DirstateParents, DirstateParseError,
 };
 use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
-use std::collections::HashMap;
+use std::convert::TryInto;
 use std::io::Cursor;
+use std::time::Duration;
 
 /// Parents are stored in the dirstate as byte hashes.
-const PARENT_SIZE: usize = 20;
+pub const PARENT_SIZE: usize = 20;
 /// Dirstate entries have a static part of 8 + 32 + 32 + 32 + 32 bits.
 const MIN_ENTRY_SIZE: usize = 17;
 
+// TODO parse/pack: is mutate-on-loop better for performance?
+
 pub fn parse_dirstate(
+    state_map: &mut StateMap,
+    copy_map: &mut CopyMap,
     contents: &[u8],
-) -> Result<(DirstateParents, DirstateVec, CopyVec), DirstateParseError> {
+) -> Result<DirstateParents, DirstateParseError> {
     if contents.len() < PARENT_SIZE * 2 {
         return Err(DirstateParseError::TooLittleData);
     }
 
-    let mut dirstate_vec = vec![];
-    let mut copies = vec![];
     let mut curr_pos = PARENT_SIZE * 2;
     let parents = DirstateParents {
-        p1: &contents[..PARENT_SIZE],
-        p2: &contents[PARENT_SIZE..curr_pos],
+        p1: copy_into_array(&contents[..PARENT_SIZE]),
+        p2: copy_into_array(&contents[PARENT_SIZE..curr_pos]),
     };
 
     while curr_pos < contents.len() {
@@ -57,9 +61,9 @@
         };
 
         if let Some(copy_path) = copy {
-            copies.push(CopyVecEntry { path, copy_path });
+            copy_map.insert(path.to_owned(), copy_path.to_owned());
         };
-        dirstate_vec.push((
+        state_map.insert(
             path.to_owned(),
             DirstateEntry {
                 state,
@@ -67,28 +71,28 @@
                 size,
                 mtime,
             },
-        ));
+        );
         curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len);
     }
 
-    Ok((parents, dirstate_vec, copies))
+    Ok(parents)
 }
 
+/// `now` is the duration in seconds since the Unix epoch
 pub fn pack_dirstate(
-    dirstate_vec: &DirstateVec,
-    copymap: &HashMap<Vec<u8>, Vec<u8>>,
+    state_map: &mut StateMap,
+    copy_map: &CopyMap,
     parents: DirstateParents,
-    now: i32,
-) -> Result<(Vec<u8>, DirstateVec), DirstatePackError> {
-    if parents.p1.len() != PARENT_SIZE || parents.p2.len() != PARENT_SIZE {
-        return Err(DirstatePackError::CorruptedParent);
-    }
+    now: Duration,
+) -> Result<Vec<u8>, DirstatePackError> {
+    // TODO move away from i32 before 2038.
+    let now: i32 = now.as_secs().try_into().expect("time overflow");
 
-    let expected_size: usize = dirstate_vec
+    let expected_size: usize = state_map
         .iter()
-        .map(|(ref filename, _)| {
+        .map(|(filename, _)| {
             let mut length = MIN_ENTRY_SIZE + filename.len();
-            if let Some(ref copy) = copymap.get(filename) {
+            if let Some(ref copy) = copy_map.get(filename) {
                 length += copy.len() + 1;
             }
             length
@@ -97,15 +101,15 @@
     let expected_size = expected_size + PARENT_SIZE * 2;
 
     let mut packed = Vec::with_capacity(expected_size);
-    let mut new_dirstate_vec = vec![];
+    let mut new_state_map = vec![];
 
-    packed.extend(parents.p1);
-    packed.extend(parents.p2);
+    packed.extend(&parents.p1);
+    packed.extend(&parents.p2);
 
-    for (ref filename, entry) in dirstate_vec {
-        let mut new_filename: Vec<u8> = filename.to_owned();
+    for (ref filename, entry) in state_map.iter() {
+        let mut new_filename: Vec<u8> = filename.to_vec();
         let mut new_mtime: i32 = entry.mtime;
-        if entry.state == 'n' as i8 && entry.mtime == now.into() {
+        if entry.state == 'n' as i8 && entry.mtime == now {
             // The file was last modified "simultaneously" with the current
             // write to dirstate (i.e. within the same second for file-
             // systems with a granularity of 1 sec). This commonly happens
@@ -116,8 +120,8 @@
             // contents of the file if the size is the same. This prevents
             // mistakenly treating such files as clean.
             new_mtime = -1;
-            new_dirstate_vec.push((
-                filename.to_owned(),
+            new_state_map.push((
+                filename.to_owned().to_vec(),
                 DirstateEntry {
                     mtime: new_mtime,
                     ..*entry
@@ -125,7 +129,7 @@
             ));
         }
 
-        if let Some(copy) = copymap.get(filename) {
+        if let Some(copy) = copy_map.get(*filename) {
             new_filename.push('\0' as u8);
             new_filename.extend(copy);
         }
@@ -142,66 +146,37 @@
         return Err(DirstatePackError::BadSize(expected_size, packed.len()));
     }
 
-    Ok((packed, new_dirstate_vec))
+    state_map.extend(new_state_map);
+
+    Ok(packed)
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use std::collections::HashMap;
 
     #[test]
     fn test_pack_dirstate_empty() {
-        let dirstate_vec: DirstateVec = vec![];
+        let mut state_map: StateMap = HashMap::new();
         let copymap = HashMap::new();
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: *b"12345678910111213141",
+            p2: *b"00000000000000000000",
         };
-        let now: i32 = 15000000;
-        let expected =
-            (b"1234567891011121314100000000000000000000".to_vec(), vec![]);
+        let now = Duration::new(15000000, 0);
+        let expected = b"1234567891011121314100000000000000000000".to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
+
+        assert!(state_map.is_empty())
     }
     #[test]
     fn test_pack_dirstate_one_entry() {
-        let dirstate_vec: DirstateVec = vec![(
-            vec!['f' as u8, '1' as u8],
-            DirstateEntry {
-                state: 'n' as i8,
-                mode: 0o644,
-                size: 0,
-                mtime: 791231220,
-            },
-        )];
-        let copymap = HashMap::new();
-        let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
-        };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 2, 102, 49,
-            ]
-            .to_vec(),
-            vec![],
-        );
-
-        assert_eq!(
-            expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
-        );
-    }
-    #[test]
-    fn test_pack_dirstate_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+        let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
                 state: 'n' as i8,
@@ -209,35 +184,36 @@
                 size: 0,
                 mtime: 791231220,
             },
-        )];
-        let mut copymap = HashMap::new();
-        copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
+
+        let copymap = HashMap::new();
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: *b"12345678910111213141",
+            p2: *b"00000000000000000000",
         };
-        let now: i32 = 15000000;
-        let expected = (
-            [
-                49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50,
-                49, 51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
-                48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0,
-                0, 0, 0, 47, 41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111,
-                112, 121, 110, 97, 109, 101,
-            ]
-            .to_vec(),
-            vec![],
-        );
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 2, 102, 49,
+        ]
+        .to_vec();
 
         assert_eq!(
             expected,
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap()
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
         );
-    }
 
+        assert_eq!(expected_state_map, state_map);
+    }
     #[test]
-    fn test_parse_pack_one_entry_with_copy() {
-        let dirstate_vec: DirstateVec = vec![(
+    fn test_pack_dirstate_one_entry_with_copy() {
+        let expected_state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
                 state: 'n' as i8,
@@ -245,36 +221,76 @@
                 size: 0,
                 mtime: 791231220,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut state_map = expected_state_map.clone();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: *b"12345678910111213141",
+            p2: *b"00000000000000000000",
         };
-        let now: i32 = 15000000;
-        let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+        let now = Duration::new(15000000, 0);
+        let expected = [
+            49, 50, 51, 52, 53, 54, 55, 56, 57, 49, 48, 49, 49, 49, 50, 49,
+            51, 49, 52, 49, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+            48, 48, 48, 48, 48, 48, 48, 48, 110, 0, 0, 1, 164, 0, 0, 0, 0, 47,
+            41, 58, 244, 0, 0, 0, 11, 102, 49, 0, 99, 111, 112, 121, 110, 97,
+            109, 101,
+        ]
+        .to_vec();
 
         assert_eq!(
-            (
-                parents,
-                dirstate_vec,
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
-            ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            expected,
+            pack_dirstate(&mut state_map, &copymap, parents, now).unwrap()
+        );
+        assert_eq!(expected_state_map, state_map);
+    }
+
+    #[test]
+    fn test_parse_pack_one_entry_with_copy() {
+        let mut state_map: StateMap = [(
+            b"f1".to_vec(),
+            DirstateEntry {
+                state: 'n' as i8,
+                mode: 0o644,
+                size: 0,
+                mtime: 791231220,
+            },
+        )]
+        .iter()
+        .cloned()
+        .collect();
+        let mut copymap = HashMap::new();
+        copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
+        let parents = DirstateParents {
+            p1: *b"12345678910111213141",
+            p2: *b"00000000000000000000",
+        };
+        let now = Duration::new(15000000, 0);
+        let result =
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
+
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
+        assert_eq!(
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     fn test_parse_pack_multiple_entries_with_copy() {
-        let dirstate_vec: DirstateVec = vec![
+        let mut state_map: StateMap = [
             (
                 b"f1".to_vec(),
                 DirstateEntry {
@@ -311,39 +327,40 @@
                     mtime: -1,
                 },
             ),
-        ];
+        ]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         copymap.insert(b"f4\xF6".to_vec(), b"copyname2".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: *b"12345678910111213141",
+            p2: *b"00000000000000000000",
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
 
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
         assert_eq!(
-            (parents, dirstate_vec, copymap),
-            parse_dirstate(result.0.as_slice())
-                .and_then(|(p, dvec, cvec)| Ok((
-                    p,
-                    dvec,
-                    cvec.iter()
-                        .map(|entry| (
-                            entry.path.to_vec(),
-                            entry.copy_path.to_vec()
-                        ))
-                        .collect()
-                )))
-                .unwrap()
+            (parents, state_map, copymap),
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 
     #[test]
     /// https://www.mercurial-scm.org/repo/hg/rev/af3f26b6bba4
     fn test_parse_pack_one_entry_with_copy_and_time_conflict() {
-        let dirstate_vec: DirstateVec = vec![(
+        let mut state_map: StateMap = [(
             b"f1".to_vec(),
             DirstateEntry {
                 state: 'n' as i8,
@@ -351,21 +368,34 @@
                 size: 0,
                 mtime: 15000000,
             },
-        )];
+        )]
+        .iter()
+        .cloned()
+        .collect();
         let mut copymap = HashMap::new();
         copymap.insert(b"f1".to_vec(), b"copyname".to_vec());
         let parents = DirstateParents {
-            p1: b"12345678910111213141",
-            p2: b"00000000000000000000",
+            p1: *b"12345678910111213141",
+            p2: *b"00000000000000000000",
         };
-        let now: i32 = 15000000;
+        let now = Duration::new(15000000, 0);
         let result =
-            pack_dirstate(&dirstate_vec, &copymap, parents, now).unwrap();
+            pack_dirstate(&mut state_map, &copymap, parents.clone(), now)
+                .unwrap();
+
+        let mut new_state_map: StateMap = HashMap::new();
+        let mut new_copy_map: CopyMap = HashMap::new();
+        let new_parents = parse_dirstate(
+            &mut new_state_map,
+            &mut new_copy_map,
+            result.as_slice(),
+        )
+        .unwrap();
 
         assert_eq!(
             (
                 parents,
-                vec![(
+                [(
                     b"f1".to_vec(),
                     DirstateEntry {
                         state: 'n' as i8,
@@ -373,16 +403,13 @@
                         size: 0,
                         mtime: -1
                     }
-                )],
-                copymap
-                    .iter()
-                    .map(|(k, v)| CopyVecEntry {
-                        path: k.as_slice(),
-                        copy_path: v.as_slice()
-                    })
-                    .collect()
+                )]
+                .iter()
+                .cloned()
+                .collect::<StateMap>(),
+                copymap,
             ),
-            parse_dirstate(result.0.as_slice()).unwrap()
+            (new_parents, new_state_map, new_copy_map)
         )
     }
 }
--- a/rust/hg-core/src/lib.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-core/src/lib.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -10,9 +10,8 @@
 pub mod testing; // unconditionally built, for use from integration tests
 pub use dirstate::{
     dirs_multiset::DirsMultiset,
-    parsers::{pack_dirstate, parse_dirstate},
-    CopyVec, CopyVecEntry, DirsIterable, DirstateEntry, DirstateParents,
-    DirstateVec,
+    parsers::{pack_dirstate, parse_dirstate, PARENT_SIZE},
+    CopyMap, DirsIterable, DirstateEntry, DirstateParents, StateMap,
 };
 mod filepatterns;
 pub mod utils;
@@ -60,6 +59,7 @@
     TooLittleData,
     Overflow,
     CorruptedEntry(String),
+    Damaged,
 }
 
 #[derive(Debug, PartialEq)]
--- a/rust/hg-core/src/utils.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-core/src/utils.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -1,5 +1,22 @@
 pub mod files;
 
+use std::convert::AsMut;
+
+/// Takes a slice and copies it into an array.
+///
+/// # Panics
+///
+/// Will panic if the slice and target array don't have the same length.
+pub fn copy_into_array<A, T>(slice: &[T]) -> A
+where
+    A: Sized + Default + AsMut<[T]>,
+    T: Copy,
+{
+    let mut a = Default::default();
+    <A as AsMut<[T]>>::as_mut(&mut a).copy_from_slice(slice);
+    a
+}
+
 /// Replaces the `from` slice with the `to` slice inside the `buf` slice.
 ///
 /// # Examples
--- a/rust/hg-cpython/src/dirstate.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -14,7 +14,7 @@
 use cpython::{
     PyBytes, PyDict, PyErr, PyModule, PyObject, PyResult, PySequence, Python,
 };
-use hg::{DirstateEntry, DirstateVec};
+use hg::{DirstateEntry, StateMap};
 use libc::{c_char, c_int};
 #[cfg(feature = "python27")]
 use python27_sys::PyCapsule_Import;
@@ -54,10 +54,7 @@
     }
 }
 
-pub fn extract_dirstate_vec(
-    py: Python,
-    dmap: &PyDict,
-) -> Result<DirstateVec, PyErr> {
+pub fn extract_dirstate(py: Python, dmap: &PyDict) -> Result<StateMap, PyErr> {
     dmap.items(py)
         .iter()
         .map(|(filename, stats)| {
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -15,7 +15,7 @@
     ToPyObject,
 };
 
-use crate::dirstate::extract_dirstate_vec;
+use crate::dirstate::extract_dirstate;
 use hg::{DirsIterable, DirsMultiset, DirstateMapError};
 
 py_class!(pub class Dirs |py| {
@@ -32,12 +32,10 @@
         if let Some(skip) = skip {
             skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8);
         }
-        let dirs_map;
-
-        if let Ok(map) = map.cast_as::<PyDict>(py) {
-            let dirstate_vec = extract_dirstate_vec(py, &map)?;
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Dirstate(dirstate_vec),
+        let inner = if let Ok(map) = map.cast_as::<PyDict>(py) {
+            let dirstate = extract_dirstate(py, &map)?;
+            DirsMultiset::new(
+                DirsIterable::Dirstate(&dirstate),
                 skip_state,
             )
         } else {
@@ -45,13 +43,13 @@
                 .iter(py)?
                 .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned()))
                 .collect();
-            dirs_map = DirsMultiset::new(
-                DirsIterable::Manifest(map?),
+            DirsMultiset::new(
+                DirsIterable::Manifest(&map?),
                 skip_state,
             )
-        }
+        };
 
-        Self::create_instance(py, RefCell::new(dirs_map))
+        Self::create_instance(py, RefCell::new(inner))
     }
 
     def addpath(&self, path: PyObject) -> PyResult<PyObject> {
--- a/rust/hg-cpython/src/parsers.rs	Wed Jul 10 10:16:28 2019 +0200
+++ b/rust/hg-cpython/src/parsers.rs	Tue Jul 09 11:49:49 2019 +0200
@@ -12,17 +12,18 @@
 //!
 use cpython::{
     exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyResult, PyTuple, Python,
-    PythonObject, ToPyObject,
+    ToPyObject,
 };
 use hg::{
-    pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry,
-    DirstatePackError, DirstateParents, DirstateParseError,
+    pack_dirstate, parse_dirstate, utils::copy_into_array, DirstateEntry,
+    DirstatePackError, DirstateParents, DirstateParseError, PARENT_SIZE,
 };
 use std::collections::HashMap;
 
 use libc::c_char;
 
-use crate::dirstate::{decapsule_make_dirstate_tuple, extract_dirstate_vec};
+use crate::dirstate::{decapsule_make_dirstate_tuple, extract_dirstate};
+use std::time::Duration;
 
 fn parse_dirstate_wrapper(
     py: Python,
@@ -30,12 +31,15 @@
     copymap: PyDict,
     st: PyBytes,
 ) -> PyResult<PyTuple> {
-    match parse_dirstate(st.data(py)) {
-        Ok((parents, dirstate_vec, copies)) => {
-            for (filename, entry) in dirstate_vec {
+    let mut dirstate_map = HashMap::new();
+    let mut copies = HashMap::new();
+
+    match parse_dirstate(&mut dirstate_map, &mut copies, st.data(py)) {
+        Ok(parents) => {
+            for (filename, entry) in dirstate_map {
                 dmap.set_item(
                     py,
-                    PyBytes::new(py, &filename[..]),
+                    PyBytes::new(py, &filename),
                     decapsule_make_dirstate_tuple(py)?(
                         entry.state as c_char,
                         entry.mode,
@@ -44,15 +48,17 @@
                     ),
                 )?;
             }
-            for CopyVecEntry { path, copy_path } in copies {
+            for (path, copy_path) in copies {
                 copymap.set_item(
                     py,
-                    PyBytes::new(py, path),
-                    PyBytes::new(py, copy_path),
+                    PyBytes::new(py, &path),
+                    PyBytes::new(py, &copy_path),
                 )?;
             }
-            Ok((PyBytes::new(py, parents.p1), PyBytes::new(py, parents.p2))
-                .to_py_object(py))
+            Ok(
+                (PyBytes::new(py, &parents.p1), PyBytes::new(py, &parents.p2))
+                    .to_py_object(py),
+            )
         }
         Err(e) => Err(PyErr::new::<exc::ValueError, _>(
             py,
@@ -64,6 +70,9 @@
                     "overflow in dirstate".to_string()
                 }
                 DirstateParseError::CorruptedEntry(e) => e,
+                DirstateParseError::Damaged => {
+                    "dirstate appears to be damaged".to_string()
+                }
             },
         )),
     }
@@ -81,7 +90,7 @@
     let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?;
     let p2: &[u8] = p2.data(py);
 
-    let dirstate_vec = extract_dirstate_vec(py, &dmap)?;
+    let mut dirstate_map = extract_dirstate(py, &dmap)?;
 
     let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap
         .items(py)
@@ -94,13 +103,23 @@
         })
         .collect();
 
+    if p1.len() != PARENT_SIZE || p2.len() != PARENT_SIZE {
+        return Err(PyErr::new::<exc::ValueError, _>(
+            py,
+            "expected a 20-byte hash".to_string(),
+        ));
+    }
+
     match pack_dirstate(
-        &dirstate_vec,
+        &mut dirstate_map,
         &copies?,
-        DirstateParents { p1, p2 },
-        now.as_object().extract::<i32>(py)?,
+        DirstateParents {
+            p1: copy_into_array(&p1),
+            p2: copy_into_array(&p2),
+        },
+        Duration::from_secs(now.value(py) as u64),
     ) {
-        Ok((packed, new_dirstate_vec)) => {
+        Ok(packed) => {
             for (
                 filename,
                 DirstateEntry {
@@ -109,7 +128,7 @@
                     size,
                     mtime,
                 },
-            ) in new_dirstate_vec
+            ) in dirstate_map
             {
                 dmap.set_item(
                     py,