dirstate-tree: Serialize to disk
authorSimon Sapin <simon.sapin@octobus.net>
Mon, 12 Apr 2021 14:21:47 +0200
changeset 47116 d6c94ca40863
parent 47115 5d62243c7732
child 47117 214ae40e136b
dirstate-tree: Serialize to disk The existing `pack_dirstate` function relies on implementation details of `DirstateMap`, so extract some parts of it as separate functions for us in the tree-based `DirstateMap`. The `bytes-cast` crate is updated to a version that has an `as_bytes` method, not just `from_bytes`: https://docs.rs/bytes-cast/0.2.0/bytes_cast/trait.BytesCast.html#method.as_bytes Drive-by refactor `clear_ambiguous_times` which does part of the same thing. Differential Revision: https://phab.mercurial-scm.org/D10486
rust/Cargo.lock
rust/hg-core/Cargo.toml
rust/hg-core/src/dirstate/parsers.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
--- a/rust/Cargo.lock	Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/Cargo.lock	Mon Apr 12 14:21:47 2021 +0200
@@ -64,9 +64,9 @@
 
 [[package]]
 name = "bytes-cast"
-version = "0.1.0"
+version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52"
+checksum = "0d434f9a4ecbe987e7ccfda7274b6f82ea52c9b63742565a65cb5e8ba0f2c452"
 dependencies = [
  "bytes-cast-derive",
 ]
--- a/rust/hg-core/Cargo.toml	Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/hg-core/Cargo.toml	Mon Apr 12 14:21:47 2021 +0200
@@ -9,7 +9,7 @@
 name = "hg"
 
 [dependencies]
-bytes-cast = "0.1"
+bytes-cast = "0.2"
 byteorder = "1.3.4"
 derive_more = "0.99"
 home = "0.5"
--- a/rust/hg-core/src/dirstate/parsers.rs	Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/hg-core/src/dirstate/parsers.rs	Mon Apr 12 14:21:47 2021 +0200
@@ -4,7 +4,7 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::errors::HgError;
-use crate::utils::hg_path::HgPath;
+use crate::utils::hg_path::{HgPath, HgPathBuf};
 use crate::{
     dirstate::{CopyMap, EntryState, RawEntry, StateMap},
     DirstateEntry, DirstateParents,
@@ -82,9 +82,71 @@
     Ok(parents)
 }
 
+fn packed_filename_and_copy_source_size(
+    filename: &HgPathBuf,
+    copy_source: Option<&HgPathBuf>,
+) -> usize {
+    filename.len()
+        + if let Some(source) = copy_source {
+            b"\0".len() + source.len()
+        } else {
+            0
+        }
+}
+
+pub fn packed_entry_size(
+    filename: &HgPathBuf,
+    copy_source: Option<&HgPathBuf>,
+) -> usize {
+    MIN_ENTRY_SIZE
+        + packed_filename_and_copy_source_size(filename, copy_source)
+}
+
+pub fn pack_entry(
+    filename: &HgPathBuf,
+    entry: &DirstateEntry,
+    copy_source: Option<&HgPathBuf>,
+    packed: &mut Vec<u8>,
+) {
+    let length = packed_filename_and_copy_source_size(filename, copy_source);
+
+    // Unwrapping because `impl std::io::Write for Vec<u8>` never errors
+    packed.write_u8(entry.state.into()).unwrap();
+    packed.write_i32::<BigEndian>(entry.mode).unwrap();
+    packed.write_i32::<BigEndian>(entry.size).unwrap();
+    packed.write_i32::<BigEndian>(entry.mtime).unwrap();
+    packed.write_i32::<BigEndian>(length as i32).unwrap();
+    packed.extend(filename.as_bytes());
+    if let Some(source) = copy_source {
+        packed.push(b'\0');
+        packed.extend(source.as_bytes());
+    }
+}
+
 /// Seconds since the Unix epoch
 pub struct Timestamp(pub u64);
 
+pub fn clear_ambiguous_mtime(
+    entry: &mut DirstateEntry,
+    mtime_now: i32,
+) -> bool {
+    let ambiguous =
+        entry.state == EntryState::Normal && entry.mtime == mtime_now;
+    if ambiguous {
+        // The file was last modified "simultaneously" with the current
+        // write to dirstate (i.e. within the same second for file-
+        // systems with a granularity of 1 sec). This commonly happens
+        // for at least a couple of files on 'update'.
+        // The user could change the file without changing its size
+        // within the same second. Invalidate the file's mtime in
+        // dirstate, forcing future 'status' calls to compare the
+        // contents of the file if the size is the same. This prevents
+        // mistakenly treating such files as clean.
+        entry.mtime = -1;
+    }
+    ambiguous
+}
+
 pub fn pack_dirstate(
     state_map: &mut StateMap,
     copy_map: &CopyMap,
@@ -97,11 +159,7 @@
     let expected_size: usize = state_map
         .iter()
         .map(|(filename, _)| {
-            let mut length = MIN_ENTRY_SIZE + filename.len();
-            if let Some(copy) = copy_map.get(filename) {
-                length += copy.len() + 1;
-            }
-            length
+            packed_entry_size(filename, copy_map.get(filename))
         })
         .sum();
     let expected_size = expected_size + PARENT_SIZE * 2;
@@ -112,39 +170,8 @@
     packed.extend(parents.p2.as_bytes());
 
     for (filename, entry) in state_map.iter_mut() {
-        let new_filename = filename.to_owned();
-        let mut new_mtime: i32 = entry.mtime;
-        if entry.state == EntryState::Normal && entry.mtime == now {
-            // The file was last modified "simultaneously" with the current
-            // write to dirstate (i.e. within the same second for file-
-            // systems with a granularity of 1 sec). This commonly happens
-            // for at least a couple of files on 'update'.
-            // The user could change the file without changing its size
-            // within the same second. Invalidate the file's mtime in
-            // dirstate, forcing future 'status' calls to compare the
-            // contents of the file if the size is the same. This prevents
-            // mistakenly treating such files as clean.
-            new_mtime = -1;
-            *entry = DirstateEntry {
-                mtime: new_mtime,
-                ..*entry
-            };
-        }
-        let mut new_filename = new_filename.into_vec();
-        if let Some(copy) = copy_map.get(filename) {
-            new_filename.push(b'\0');
-            new_filename.extend(copy.bytes());
-        }
-
-        // Unwrapping because `impl std::io::Write for Vec<u8>` never errors
-        packed.write_u8(entry.state.into()).unwrap();
-        packed.write_i32::<BigEndian>(entry.mode).unwrap();
-        packed.write_i32::<BigEndian>(entry.size).unwrap();
-        packed.write_i32::<BigEndian>(new_mtime).unwrap();
-        packed
-            .write_i32::<BigEndian>(new_filename.len() as i32)
-            .unwrap();
-        packed.extend(new_filename)
+        clear_ambiguous_mtime(entry, now);
+        pack_entry(filename, entry, copy_map.get(filename), &mut packed)
     }
 
     if packed.len() != expected_size {
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Apr 12 14:21:47 2021 +0200
@@ -1,11 +1,14 @@
-use std::collections::BTreeMap;
+use bytes_cast::BytesCast;
 use std::path::PathBuf;
+use std::{collections::BTreeMap, convert::TryInto};
 
 use super::path_with_basename::WithBasename;
+use crate::dirstate::parsers::clear_ambiguous_mtime;
+use crate::dirstate::parsers::pack_entry;
+use crate::dirstate::parsers::packed_entry_size;
 use crate::dirstate::parsers::parse_dirstate_entries;
 use crate::dirstate::parsers::parse_dirstate_parents;
 use crate::dirstate::parsers::Timestamp;
-
 use crate::matchers::Matcher;
 use crate::revlog::node::NULL_NODE;
 use crate::utils::hg_path::{HgPath, HgPathBuf};
@@ -327,11 +330,38 @@
 
     fn pack(
         &mut self,
-        _parents: DirstateParents,
-        _now: Timestamp,
+        parents: DirstateParents,
+        now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
-        let _ = self.iter_node_data_mut();
-        todo!()
+        // Optizimation (to be measured?): pre-compute size to avoid `Vec`
+        // reallocations
+        let mut size = parents.as_bytes().len();
+        for (path, node) in self.iter_nodes() {
+            if node.entry.is_some() {
+                size += packed_entry_size(
+                    path.full_path(),
+                    node.copy_source.as_ref(),
+                )
+            }
+        }
+
+        let mut packed = Vec::with_capacity(size);
+        packed.extend(parents.as_bytes());
+
+        let now: i32 = now.0.try_into().expect("time overflow");
+        for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
+            if let Some(entry) = opt_entry {
+                clear_ambiguous_mtime(entry, now);
+                pack_entry(
+                    path.full_path(),
+                    entry,
+                    copy_source.as_ref(),
+                    &mut packed,
+                );
+            }
+        }
+        self.dirty_parents = false;
+        Ok(packed)
     }
 
     fn build_file_fold_map(&mut self) -> &FastHashMap<HgPathBuf, HgPathBuf> {