rhg: Add Repo::write_dirstate
This method is not used yet. It saves to disk any mutation that was done to
the `Repo`’s dirstate through `Repo::dirstate_map_mut`. It takes care of
dirstate-v1 v.s. dirstate-v2, dockets, data files, appending when possible,
etc.
Differential Revision: https://phab.mercurial-scm.org/D11839
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Dec 02 15:10:03 2021 +0100
@@ -951,7 +951,7 @@
pub fn pack_v2(
&self,
can_append: bool,
- ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
+ ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
let map = self.get_map();
on_disk::write(map, can_append)
}
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Dec 02 15:10:03 2021 +0100
@@ -14,8 +14,10 @@
use bytes_cast::unaligned::{U16Be, U32Be};
use bytes_cast::BytesCast;
use format_bytes::format_bytes;
+use rand::Rng;
use std::borrow::Cow;
use std::convert::{TryFrom, TryInto};
+use std::fmt::Write;
/// Added at the start of `.hg/dirstate` when the "v2" format is used.
/// This a redundant sanity check more than an actual "magic number" since
@@ -68,7 +70,7 @@
/// section of `mercurial/helptext/internals/dirstate-v2.txt`
#[derive(BytesCast)]
#[repr(C)]
-struct TreeMetadata {
+pub struct TreeMetadata {
root_nodes: ChildNodes,
nodes_with_entry_count: Size,
nodes_with_copy_source_count: Size,
@@ -186,7 +188,51 @@
}
}
+impl TreeMetadata {
+ pub fn as_bytes(&self) -> &[u8] {
+ BytesCast::as_bytes(self)
+ }
+}
+
impl<'on_disk> Docket<'on_disk> {
+ /// Generate the identifier for a new data file
+ ///
+ /// TODO: support the `HGTEST_UUIDFILE` environment variable.
+ /// See `mercurial/revlogutils/docket.py`
+ pub fn new_uid() -> String {
+ const ID_LENGTH: usize = 8;
+ let mut id = String::with_capacity(ID_LENGTH);
+ let mut rng = rand::thread_rng();
+ for _ in 0..ID_LENGTH {
+ // One random hexadecimal digit.
+ // `unwrap` never panics because `impl Write for String`
+ // never returns an error.
+ write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap();
+ }
+ id
+ }
+
+ pub fn serialize(
+ parents: DirstateParents,
+ tree_metadata: TreeMetadata,
+ data_size: u64,
+ uuid: &[u8],
+ ) -> Result<Vec<u8>, std::num::TryFromIntError> {
+ let header = DocketHeader {
+ marker: *V2_FORMAT_MARKER,
+ parent_1: parents.p1.pad_to_256_bits(),
+ parent_2: parents.p2.pad_to_256_bits(),
+ metadata: tree_metadata,
+ data_size: u32::try_from(data_size)?.into(),
+ uuid_size: uuid.len().try_into()?,
+ };
+ let header = header.as_bytes();
+ let mut docket = Vec::with_capacity(header.len() + uuid.len());
+ docket.extend_from_slice(header);
+ docket.extend_from_slice(uuid);
+ Ok(docket)
+ }
+
pub fn parents(&self) -> DirstateParents {
use crate::Node;
let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES])
@@ -555,7 +601,7 @@
pub(super) fn write(
dirstate_map: &DirstateMap,
can_append: bool,
-) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> {
+) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
let append = can_append && dirstate_map.write_should_append();
// This ignores the space for paths, and for nodes without an entry.
@@ -581,7 +627,7 @@
unused: [0; 4],
ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
};
- Ok((writer.out, meta.as_bytes().to_vec(), append))
+ Ok((writer.out, meta, append))
}
struct Writer<'dmap, 'on_disk> {
--- a/rust/hg-core/src/repo.rs Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/repo.rs Thu Dec 02 15:10:03 2021 +0100
@@ -2,9 +2,10 @@
use crate::config::{Config, ConfigError, ConfigParseError};
use crate::dirstate::DirstateParents;
use crate::dirstate_tree::dirstate_map::DirstateMap;
+use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
use crate::dirstate_tree::owning::OwningDirstateMap;
-use crate::errors::HgError;
use crate::errors::HgResultExt;
+use crate::errors::{HgError, IoResultExt};
use crate::exit_codes;
use crate::lock::{try_with_lock_no_wait, LockError};
use crate::manifest::{Manifest, Manifestlog};
@@ -18,6 +19,9 @@
use crate::{DirstateError, Revision};
use std::cell::{Ref, RefCell, RefMut};
use std::collections::HashSet;
+use std::io::Seek;
+use std::io::SeekFrom;
+use std::io::Write as IoWrite;
use std::path::{Path, PathBuf};
/// A repository on disk
@@ -416,6 +420,70 @@
pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> {
Filelog::open(self, path)
}
+
+ /// Write to disk any updates that were made through `dirstate_map_mut`.
+ ///
+ /// The "wlock" must be held while calling this.
+ /// See for example `try_with_wlock_no_wait`.
+ ///
+ /// TODO: have a `WritableRepo` type only accessible while holding the
+ /// lock?
+ pub fn write_dirstate(&self) -> Result<(), DirstateError> {
+ let map = self.dirstate_map()?;
+ // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if
+ // it’s unset
+ let parents = self.dirstate_parents()?;
+ let packed_dirstate = if self.has_dirstate_v2() {
+ let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
+ let mut uuid = uuid.as_ref();
+ let can_append = uuid.is_some();
+ let (data, tree_metadata, append) = map.pack_v2(can_append)?;
+ if !append {
+ uuid = None
+ }
+ let uuid = if let Some(uuid) = uuid {
+ std::str::from_utf8(uuid)
+ .map_err(|_| {
+ HgError::corrupted("non-UTF-8 dirstate data file ID")
+ })?
+ .to_owned()
+ } else {
+ DirstateDocket::new_uid()
+ };
+ let data_filename = format!("dirstate.{}", uuid);
+ let data_filename = self.hg_vfs().join(data_filename);
+ let mut options = std::fs::OpenOptions::new();
+ if append {
+ options.append(true);
+ } else {
+ options.write(true).create_new(true);
+ }
+ let data_size = (|| {
+ // TODO: loop and try another random ID if !append and this
+ // returns `ErrorKind::AlreadyExists`? Collision chance of two
+ // random IDs is one in 2**32
+ let mut file = options.open(&data_filename)?;
+ file.write_all(&data)?;
+ file.flush()?;
+ // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
+ file.seek(SeekFrom::Current(0))
+ })()
+ .when_writing_file(&data_filename)?;
+ DirstateDocket::serialize(
+ parents,
+ tree_metadata,
+ data_size,
+ uuid.as_bytes(),
+ )
+ .map_err(|_: std::num::TryFromIntError| {
+ HgError::corrupted("overflow in dirstate docket serialization")
+ })?
+ } else {
+ map.pack_v1(parents)?
+ };
+ self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?;
+ Ok(())
+ }
}
/// Lazily-initialized component of `Repo` with interior mutability
--- a/rust/hg-core/src/revlog/node.rs Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-core/src/revlog/node.rs Thu Dec 02 15:10:03 2021 +0100
@@ -174,6 +174,12 @@
data: self.data,
}
}
+
+ pub fn pad_to_256_bits(&self) -> [u8; 32] {
+ let mut bits = [0; 32];
+ bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data);
+ bits
+ }
}
/// The beginning of a binary revision SHA.
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Dec 02 12:05:36 2021 +0100
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Dec 02 15:10:03 2021 +0100
@@ -222,7 +222,7 @@
match result {
Ok((packed, tree_metadata, append)) => {
let packed = PyBytes::new(py, &packed);
- let tree_metadata = PyBytes::new(py, &tree_metadata);
+ let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
let tuple = (packed, tree_metadata, append);
Ok(tuple.to_py_object(py).into_object())
},