Mercurial > hg
changeset 47102:d6c94ca40863
dirstate-tree: Serialize to disk
The existing `pack_dirstate` function relies on implementation details
of `DirstateMap`, so extract some parts of it as separate functions
for us in the tree-based `DirstateMap`.
The `bytes-cast` crate is updated to a version that has an `as_bytes` method,
not just `from_bytes`:
https://docs.rs/bytes-cast/0.2.0/bytes_cast/trait.BytesCast.html#method.as_bytes
Drive-by refactor `clear_ambiguous_times` which does part of the same thing.
Differential Revision: https://phab.mercurial-scm.org/D10486
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Mon, 12 Apr 2021 14:21:47 +0200 |
parents | 5d62243c7732 |
children | 214ae40e136b |
files | rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/dirstate/parsers.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs |
diffstat | 4 files changed, 105 insertions(+), 48 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/Cargo.lock Mon Apr 12 14:43:45 2021 +0200 +++ b/rust/Cargo.lock Mon Apr 12 14:21:47 2021 +0200 @@ -64,9 +64,9 @@ [[package]] name = "bytes-cast" -version = "0.1.0" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3196ba300c7bc9282a4331e878496cb3e9603a898a8f1446601317163e16ca52" +checksum = "0d434f9a4ecbe987e7ccfda7274b6f82ea52c9b63742565a65cb5e8ba0f2c452" dependencies = [ "bytes-cast-derive", ]
--- a/rust/hg-core/Cargo.toml Mon Apr 12 14:43:45 2021 +0200 +++ b/rust/hg-core/Cargo.toml Mon Apr 12 14:21:47 2021 +0200 @@ -9,7 +9,7 @@ name = "hg" [dependencies] -bytes-cast = "0.1" +bytes-cast = "0.2" byteorder = "1.3.4" derive_more = "0.99" home = "0.5"
--- a/rust/hg-core/src/dirstate/parsers.rs Mon Apr 12 14:43:45 2021 +0200 +++ b/rust/hg-core/src/dirstate/parsers.rs Mon Apr 12 14:21:47 2021 +0200 @@ -4,7 +4,7 @@ // GNU General Public License version 2 or any later version. use crate::errors::HgError; -use crate::utils::hg_path::HgPath; +use crate::utils::hg_path::{HgPath, HgPathBuf}; use crate::{ dirstate::{CopyMap, EntryState, RawEntry, StateMap}, DirstateEntry, DirstateParents, @@ -82,9 +82,71 @@ Ok(parents) } +fn packed_filename_and_copy_source_size( + filename: &HgPathBuf, + copy_source: Option<&HgPathBuf>, +) -> usize { + filename.len() + + if let Some(source) = copy_source { + b"\0".len() + source.len() + } else { + 0 + } +} + +pub fn packed_entry_size( + filename: &HgPathBuf, + copy_source: Option<&HgPathBuf>, +) -> usize { + MIN_ENTRY_SIZE + + packed_filename_and_copy_source_size(filename, copy_source) +} + +pub fn pack_entry( + filename: &HgPathBuf, + entry: &DirstateEntry, + copy_source: Option<&HgPathBuf>, + packed: &mut Vec<u8>, +) { + let length = packed_filename_and_copy_source_size(filename, copy_source); + + // Unwrapping because `impl std::io::Write for Vec<u8>` never errors + packed.write_u8(entry.state.into()).unwrap(); + packed.write_i32::<BigEndian>(entry.mode).unwrap(); + packed.write_i32::<BigEndian>(entry.size).unwrap(); + packed.write_i32::<BigEndian>(entry.mtime).unwrap(); + packed.write_i32::<BigEndian>(length as i32).unwrap(); + packed.extend(filename.as_bytes()); + if let Some(source) = copy_source { + packed.push(b'\0'); + packed.extend(source.as_bytes()); + } +} + /// Seconds since the Unix epoch pub struct Timestamp(pub u64); +pub fn clear_ambiguous_mtime( + entry: &mut DirstateEntry, + mtime_now: i32, +) -> bool { + let ambiguous = + entry.state == EntryState::Normal && entry.mtime == mtime_now; + if ambiguous { + // The file was last modified "simultaneously" with the current + // write to dirstate (i.e. within the same second for file- + // systems with a granularity of 1 sec). This commonly happens + // for at least a couple of files on 'update'. + // The user could change the file without changing its size + // within the same second. Invalidate the file's mtime in + // dirstate, forcing future 'status' calls to compare the + // contents of the file if the size is the same. This prevents + // mistakenly treating such files as clean. + entry.mtime = -1; + } + ambiguous +} + pub fn pack_dirstate( state_map: &mut StateMap, copy_map: &CopyMap, @@ -97,11 +159,7 @@ let expected_size: usize = state_map .iter() .map(|(filename, _)| { - let mut length = MIN_ENTRY_SIZE + filename.len(); - if let Some(copy) = copy_map.get(filename) { - length += copy.len() + 1; - } - length + packed_entry_size(filename, copy_map.get(filename)) }) .sum(); let expected_size = expected_size + PARENT_SIZE * 2; @@ -112,39 +170,8 @@ packed.extend(parents.p2.as_bytes()); for (filename, entry) in state_map.iter_mut() { - let new_filename = filename.to_owned(); - let mut new_mtime: i32 = entry.mtime; - if entry.state == EntryState::Normal && entry.mtime == now { - // The file was last modified "simultaneously" with the current - // write to dirstate (i.e. within the same second for file- - // systems with a granularity of 1 sec). This commonly happens - // for at least a couple of files on 'update'. - // The user could change the file without changing its size - // within the same second. Invalidate the file's mtime in - // dirstate, forcing future 'status' calls to compare the - // contents of the file if the size is the same. This prevents - // mistakenly treating such files as clean. - new_mtime = -1; - *entry = DirstateEntry { - mtime: new_mtime, - ..*entry - }; - } - let mut new_filename = new_filename.into_vec(); - if let Some(copy) = copy_map.get(filename) { - new_filename.push(b'\0'); - new_filename.extend(copy.bytes()); - } - - // Unwrapping because `impl std::io::Write for Vec<u8>` never errors - packed.write_u8(entry.state.into()).unwrap(); - packed.write_i32::<BigEndian>(entry.mode).unwrap(); - packed.write_i32::<BigEndian>(entry.size).unwrap(); - packed.write_i32::<BigEndian>(new_mtime).unwrap(); - packed - .write_i32::<BigEndian>(new_filename.len() as i32) - .unwrap(); - packed.extend(new_filename) + clear_ambiguous_mtime(entry, now); + pack_entry(filename, entry, copy_map.get(filename), &mut packed) } if packed.len() != expected_size {
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Apr 12 14:43:45 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Apr 12 14:21:47 2021 +0200 @@ -1,11 +1,14 @@ -use std::collections::BTreeMap; +use bytes_cast::BytesCast; use std::path::PathBuf; +use std::{collections::BTreeMap, convert::TryInto}; use super::path_with_basename::WithBasename; +use crate::dirstate::parsers::clear_ambiguous_mtime; +use crate::dirstate::parsers::pack_entry; +use crate::dirstate::parsers::packed_entry_size; use crate::dirstate::parsers::parse_dirstate_entries; use crate::dirstate::parsers::parse_dirstate_parents; use crate::dirstate::parsers::Timestamp; - use crate::matchers::Matcher; use crate::revlog::node::NULL_NODE; use crate::utils::hg_path::{HgPath, HgPathBuf}; @@ -327,11 +330,38 @@ fn pack( &mut self, - _parents: DirstateParents, - _now: Timestamp, + parents: DirstateParents, + now: Timestamp, ) -> Result<Vec<u8>, DirstateError> { - let _ = self.iter_node_data_mut(); - todo!() + // Optizimation (to be measured?): pre-compute size to avoid `Vec` + // reallocations + let mut size = parents.as_bytes().len(); + for (path, node) in self.iter_nodes() { + if node.entry.is_some() { + size += packed_entry_size( + path.full_path(), + node.copy_source.as_ref(), + ) + } + } + + let mut packed = Vec::with_capacity(size); + packed.extend(parents.as_bytes()); + + let now: i32 = now.0.try_into().expect("time overflow"); + for (path, opt_entry, copy_source) in self.iter_node_data_mut() { + if let Some(entry) = opt_entry { + clear_ambiguous_mtime(entry, now); + pack_entry( + path.full_path(), + entry, + copy_source.as_ref(), + &mut packed, + ); + } + } + self.dirty_parents = false; + Ok(packed) } fn build_file_fold_map(&mut self) -> &FastHashMap<HgPathBuf, HgPathBuf> {