dirstate-tree: Serialize to disk
The existing `pack_dirstate` function relies on implementation details
of `DirstateMap`, so extract some parts of it as separate functions
for us in the tree-based `DirstateMap`.
The `bytes-cast` crate is updated to a version that has an `as_bytes` method,
not just `from_bytes`:
https://docs.rs/bytes-cast/0.2.0/bytes_cast/trait.BytesCast.html#method.as_bytes
Drive-by refactor `clear_ambiguous_times` which does part of the same thing.
Differential Revision: https://phab.mercurial-scm.org/D10486
--- a/rust/Cargo.lock Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/Cargo.lock Mon Apr 12 14:21:47 2021 +0200
@@ -64,9 +64,9 @@
[[package]]
name = "bytes-cast"
-version = "0.1.0"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52"
+checksum = "0d434f9a4ecbe987e7ccfda7274b6f82ea52c9b63742565a65cb5e8ba0f2c452"
dependencies = [
"bytes-cast-derive",
]
--- a/rust/hg-core/Cargo.toml Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/hg-core/Cargo.toml Mon Apr 12 14:21:47 2021 +0200
@@ -9,7 +9,7 @@
name = "hg"
[dependencies]
-bytes-cast = "0.1"
+bytes-cast = "0.2"
byteorder = "1.3.4"
derive_more = "0.99"
home = "0.5"
--- a/rust/hg-core/src/dirstate/parsers.rs Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/hg-core/src/dirstate/parsers.rs Mon Apr 12 14:21:47 2021 +0200
@@ -4,7 +4,7 @@
// GNU General Public License version 2 or any later version.
use crate::errors::HgError;
-use crate::utils::hg_path::HgPath;
+use crate::utils::hg_path::{HgPath, HgPathBuf};
use crate::{
dirstate::{CopyMap, EntryState, RawEntry, StateMap},
DirstateEntry, DirstateParents,
@@ -82,9 +82,71 @@
Ok(parents)
}
+fn packed_filename_and_copy_source_size(
+ filename: &HgPathBuf,
+ copy_source: Option<&HgPathBuf>,
+) -> usize {
+ filename.len()
+ + if let Some(source) = copy_source {
+ b"\0".len() + source.len()
+ } else {
+ 0
+ }
+}
+
+pub fn packed_entry_size(
+ filename: &HgPathBuf,
+ copy_source: Option<&HgPathBuf>,
+) -> usize {
+ MIN_ENTRY_SIZE
+ + packed_filename_and_copy_source_size(filename, copy_source)
+}
+
+pub fn pack_entry(
+ filename: &HgPathBuf,
+ entry: &DirstateEntry,
+ copy_source: Option<&HgPathBuf>,
+ packed: &mut Vec<u8>,
+) {
+ let length = packed_filename_and_copy_source_size(filename, copy_source);
+
+ // Unwrapping because `impl std::io::Write for Vec<u8>` never errors
+ packed.write_u8(entry.state.into()).unwrap();
+ packed.write_i32::<BigEndian>(entry.mode).unwrap();
+ packed.write_i32::<BigEndian>(entry.size).unwrap();
+ packed.write_i32::<BigEndian>(entry.mtime).unwrap();
+ packed.write_i32::<BigEndian>(length as i32).unwrap();
+ packed.extend(filename.as_bytes());
+ if let Some(source) = copy_source {
+ packed.push(b'\0');
+ packed.extend(source.as_bytes());
+ }
+}
+
/// Seconds since the Unix epoch
pub struct Timestamp(pub u64);
+pub fn clear_ambiguous_mtime(
+ entry: &mut DirstateEntry,
+ mtime_now: i32,
+) -> bool {
+ let ambiguous =
+ entry.state == EntryState::Normal && entry.mtime == mtime_now;
+ if ambiguous {
+ // The file was last modified "simultaneously" with the current
+ // write to dirstate (i.e. within the same second for file-
+ // systems with a granularity of 1 sec). This commonly happens
+ // for at least a couple of files on 'update'.
+ // The user could change the file without changing its size
+ // within the same second. Invalidate the file's mtime in
+ // dirstate, forcing future 'status' calls to compare the
+ // contents of the file if the size is the same. This prevents
+ // mistakenly treating such files as clean.
+ entry.mtime = -1;
+ }
+ ambiguous
+}
+
pub fn pack_dirstate(
state_map: &mut StateMap,
copy_map: &CopyMap,
@@ -97,11 +159,7 @@
let expected_size: usize = state_map
.iter()
.map(|(filename, _)| {
- let mut length = MIN_ENTRY_SIZE + filename.len();
- if let Some(copy) = copy_map.get(filename) {
- length += copy.len() + 1;
- }
- length
+ packed_entry_size(filename, copy_map.get(filename))
})
.sum();
let expected_size = expected_size + PARENT_SIZE * 2;
@@ -112,39 +170,8 @@
packed.extend(parents.p2.as_bytes());
for (filename, entry) in state_map.iter_mut() {
- let new_filename = filename.to_owned();
- let mut new_mtime: i32 = entry.mtime;
- if entry.state == EntryState::Normal && entry.mtime == now {
- // The file was last modified "simultaneously" with the current
- // write to dirstate (i.e. within the same second for file-
- // systems with a granularity of 1 sec). This commonly happens
- // for at least a couple of files on 'update'.
- // The user could change the file without changing its size
- // within the same second. Invalidate the file's mtime in
- // dirstate, forcing future 'status' calls to compare the
- // contents of the file if the size is the same. This prevents
- // mistakenly treating such files as clean.
- new_mtime = -1;
- *entry = DirstateEntry {
- mtime: new_mtime,
- ..*entry
- };
- }
- let mut new_filename = new_filename.into_vec();
- if let Some(copy) = copy_map.get(filename) {
- new_filename.push(b'\0');
- new_filename.extend(copy.bytes());
- }
-
- // Unwrapping because `impl std::io::Write for Vec<u8>` never errors
- packed.write_u8(entry.state.into()).unwrap();
- packed.write_i32::<BigEndian>(entry.mode).unwrap();
- packed.write_i32::<BigEndian>(entry.size).unwrap();
- packed.write_i32::<BigEndian>(new_mtime).unwrap();
- packed
- .write_i32::<BigEndian>(new_filename.len() as i32)
- .unwrap();
- packed.extend(new_filename)
+ clear_ambiguous_mtime(entry, now);
+ pack_entry(filename, entry, copy_map.get(filename), &mut packed)
}
if packed.len() != expected_size {
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Apr 12 14:43:45 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Apr 12 14:21:47 2021 +0200
@@ -1,11 +1,14 @@
-use std::collections::BTreeMap;
+use bytes_cast::BytesCast;
use std::path::PathBuf;
+use std::{collections::BTreeMap, convert::TryInto};
use super::path_with_basename::WithBasename;
+use crate::dirstate::parsers::clear_ambiguous_mtime;
+use crate::dirstate::parsers::pack_entry;
+use crate::dirstate::parsers::packed_entry_size;
use crate::dirstate::parsers::parse_dirstate_entries;
use crate::dirstate::parsers::parse_dirstate_parents;
use crate::dirstate::parsers::Timestamp;
-
use crate::matchers::Matcher;
use crate::revlog::node::NULL_NODE;
use crate::utils::hg_path::{HgPath, HgPathBuf};
@@ -327,11 +330,38 @@
fn pack(
&mut self,
- _parents: DirstateParents,
- _now: Timestamp,
+ parents: DirstateParents,
+ now: Timestamp,
) -> Result<Vec<u8>, DirstateError> {
- let _ = self.iter_node_data_mut();
- todo!()
+ // Optizimation (to be measured?): pre-compute size to avoid `Vec`
+ // reallocations
+ let mut size = parents.as_bytes().len();
+ for (path, node) in self.iter_nodes() {
+ if node.entry.is_some() {
+ size += packed_entry_size(
+ path.full_path(),
+ node.copy_source.as_ref(),
+ )
+ }
+ }
+
+ let mut packed = Vec::with_capacity(size);
+ packed.extend(parents.as_bytes());
+
+ let now: i32 = now.0.try_into().expect("time overflow");
+ for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
+ if let Some(entry) = opt_entry {
+ clear_ambiguous_mtime(entry, now);
+ pack_entry(
+ path.full_path(),
+ entry,
+ copy_source.as_ref(),
+ &mut packed,
+ );
+ }
+ }
+ self.dirty_parents = false;
+ Ok(packed)
}
fn build_file_fold_map(&mut self) -> &FastHashMap<HgPathBuf, HgPathBuf> {