Mercurial > hg
changeset 48421:2097f63575a5
rhg: Add Repo::write_dirstate
This method is not used yet. It saves to disk any mutation that was done to
the `Repo`’s dirstate through `Repo::dirstate_map_mut`. It takes care of
dirstate-v1 v.s. dirstate-v2, dockets, data files, appending when possible,
etc.
Differential Revision: https://phab.mercurial-scm.org/D11839
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Thu, 02 Dec 2021 15:10:03 +0100 |
parents | c7c23bb036c9 |
children | 000130cfafb6 |
files | rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/repo.rs rust/hg-core/src/revlog/node.rs rust/hg-cpython/src/dirstate/dirstate_map.rs |
diffstat | 5 files changed, 126 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Dec 02 12:05:36 2021 +0100 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu Dec 02 15:10:03 2021 +0100 @@ -951,7 +951,7 @@ pub fn pack_v2( &self, can_append: bool, - ) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> { + ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> { let map = self.get_map(); on_disk::write(map, can_append) }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Dec 02 12:05:36 2021 +0100 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Dec 02 15:10:03 2021 +0100 @@ -14,8 +14,10 @@ use bytes_cast::unaligned::{U16Be, U32Be}; use bytes_cast::BytesCast; use format_bytes::format_bytes; +use rand::Rng; use std::borrow::Cow; use std::convert::{TryFrom, TryInto}; +use std::fmt::Write; /// Added at the start of `.hg/dirstate` when the "v2" format is used. /// This a redundant sanity check more than an actual "magic number" since @@ -68,7 +70,7 @@ /// section of `mercurial/helptext/internals/dirstate-v2.txt` #[derive(BytesCast)] #[repr(C)] -struct TreeMetadata { +pub struct TreeMetadata { root_nodes: ChildNodes, nodes_with_entry_count: Size, nodes_with_copy_source_count: Size, @@ -186,7 +188,51 @@ } } +impl TreeMetadata { + pub fn as_bytes(&self) -> &[u8] { + BytesCast::as_bytes(self) + } +} + impl<'on_disk> Docket<'on_disk> { + /// Generate the identifier for a new data file + /// + /// TODO: support the `HGTEST_UUIDFILE` environment variable. + /// See `mercurial/revlogutils/docket.py` + pub fn new_uid() -> String { + const ID_LENGTH: usize = 8; + let mut id = String::with_capacity(ID_LENGTH); + let mut rng = rand::thread_rng(); + for _ in 0..ID_LENGTH { + // One random hexadecimal digit. + // `unwrap` never panics because `impl Write for String` + // never returns an error. + write!(&mut id, "{:x}", rng.gen_range(0, 16)).unwrap(); + } + id + } + + pub fn serialize( + parents: DirstateParents, + tree_metadata: TreeMetadata, + data_size: u64, + uuid: &[u8], + ) -> Result<Vec<u8>, std::num::TryFromIntError> { + let header = DocketHeader { + marker: *V2_FORMAT_MARKER, + parent_1: parents.p1.pad_to_256_bits(), + parent_2: parents.p2.pad_to_256_bits(), + metadata: tree_metadata, + data_size: u32::try_from(data_size)?.into(), + uuid_size: uuid.len().try_into()?, + }; + let header = header.as_bytes(); + let mut docket = Vec::with_capacity(header.len() + uuid.len()); + docket.extend_from_slice(header); + docket.extend_from_slice(uuid); + Ok(docket) + } + pub fn parents(&self) -> DirstateParents { use crate::Node; let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) @@ -555,7 +601,7 @@ pub(super) fn write( dirstate_map: &DirstateMap, can_append: bool, -) -> Result<(Vec<u8>, Vec<u8>, bool), DirstateError> { +) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> { let append = can_append && dirstate_map.write_should_append(); // This ignores the space for paths, and for nodes without an entry. @@ -581,7 +627,7 @@ unused: [0; 4], ignore_patterns_hash: dirstate_map.ignore_patterns_hash, }; - Ok((writer.out, meta.as_bytes().to_vec(), append)) + Ok((writer.out, meta, append)) } struct Writer<'dmap, 'on_disk> {
--- a/rust/hg-core/src/repo.rs Thu Dec 02 12:05:36 2021 +0100 +++ b/rust/hg-core/src/repo.rs Thu Dec 02 15:10:03 2021 +0100 @@ -2,9 +2,10 @@ use crate::config::{Config, ConfigError, ConfigParseError}; use crate::dirstate::DirstateParents; use crate::dirstate_tree::dirstate_map::DirstateMap; +use crate::dirstate_tree::on_disk::Docket as DirstateDocket; use crate::dirstate_tree::owning::OwningDirstateMap; -use crate::errors::HgError; use crate::errors::HgResultExt; +use crate::errors::{HgError, IoResultExt}; use crate::exit_codes; use crate::lock::{try_with_lock_no_wait, LockError}; use crate::manifest::{Manifest, Manifestlog}; @@ -18,6 +19,9 @@ use crate::{DirstateError, Revision}; use std::cell::{Ref, RefCell, RefMut}; use std::collections::HashSet; +use std::io::Seek; +use std::io::SeekFrom; +use std::io::Write as IoWrite; use std::path::{Path, PathBuf}; /// A repository on disk @@ -416,6 +420,70 @@ pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> { Filelog::open(self, path) } + + /// Write to disk any updates that were made through `dirstate_map_mut`. + /// + /// The "wlock" must be held while calling this. + /// See for example `try_with_wlock_no_wait`. + /// + /// TODO: have a `WritableRepo` type only accessible while holding the + /// lock? + pub fn write_dirstate(&self) -> Result<(), DirstateError> { + let map = self.dirstate_map()?; + // TODO: Maintain a `DirstateMap::dirty` flag, and return early here if + // it’s unset + let parents = self.dirstate_parents()?; + let packed_dirstate = if self.has_dirstate_v2() { + let uuid = self.dirstate_data_file_uuid.get_or_init(self)?; + let mut uuid = uuid.as_ref(); + let can_append = uuid.is_some(); + let (data, tree_metadata, append) = map.pack_v2(can_append)?; + if !append { + uuid = None + } + let uuid = if let Some(uuid) = uuid { + std::str::from_utf8(uuid) + .map_err(|_| { + HgError::corrupted("non-UTF-8 dirstate data file ID") + })? + .to_owned() + } else { + DirstateDocket::new_uid() + }; + let data_filename = format!("dirstate.{}", uuid); + let data_filename = self.hg_vfs().join(data_filename); + let mut options = std::fs::OpenOptions::new(); + if append { + options.append(true); + } else { + options.write(true).create_new(true); + } + let data_size = (|| { + // TODO: loop and try another random ID if !append and this + // returns `ErrorKind::AlreadyExists`? Collision chance of two + // random IDs is one in 2**32 + let mut file = options.open(&data_filename)?; + file.write_all(&data)?; + file.flush()?; + // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+ + file.seek(SeekFrom::Current(0)) + })() + .when_writing_file(&data_filename)?; + DirstateDocket::serialize( + parents, + tree_metadata, + data_size, + uuid.as_bytes(), + ) + .map_err(|_: std::num::TryFromIntError| { + HgError::corrupted("overflow in dirstate docket serialization") + })? + } else { + map.pack_v1(parents)? + }; + self.hg_vfs().atomic_write("dirstate", &packed_dirstate)?; + Ok(()) + } } /// Lazily-initialized component of `Repo` with interior mutability
--- a/rust/hg-core/src/revlog/node.rs Thu Dec 02 12:05:36 2021 +0100 +++ b/rust/hg-core/src/revlog/node.rs Thu Dec 02 15:10:03 2021 +0100 @@ -174,6 +174,12 @@ data: self.data, } } + + pub fn pad_to_256_bits(&self) -> [u8; 32] { + let mut bits = [0; 32]; + bits[..NODE_BYTES_LENGTH].copy_from_slice(&self.data); + bits + } } /// The beginning of a binary revision SHA.
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Dec 02 12:05:36 2021 +0100 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Thu Dec 02 15:10:03 2021 +0100 @@ -222,7 +222,7 @@ match result { Ok((packed, tree_metadata, append)) => { let packed = PyBytes::new(py, &packed); - let tree_metadata = PyBytes::new(py, &tree_metadata); + let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes()); let tuple = (packed, tree_metadata, append); Ok(tuple.to_py_object(py).into_object()) },