Mercurial > hg
diff rust/hg-core/src/dirstate_tree/on_disk.rs @ 47674:ff97e793ed36
dirstate-v2: Introduce a docket file
.hg/dirstate now only contains some metadata to point to a separate data file
named .hg/dirstate.{}.d with a random hexadecimal identifier. For now every
update creates a new data file and removes the old one, but later we’ll
(usually) append to an existing file.
Separating into two files allows doing the "write to a temporary file then
atomically rename into destination" dance with only a small docket file,
without always rewriting a lot of data.
Differential Revision: https://phab.mercurial-scm.org/D11088
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Thu, 08 Jul 2021 12:18:21 +0200 |
parents | 8851acad5906 |
children | 48aec076b8fb |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Jul 15 17:24:09 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu Jul 08 12:18:21 2021 +0200 @@ -19,6 +19,7 @@ use crate::EntryState; use bytes_cast::unaligned::{I32Be, I64Be, U32Be}; use bytes_cast::BytesCast; +use format_bytes::format_bytes; use std::borrow::Cow; use std::convert::TryFrom; use std::time::{Duration, SystemTime, UNIX_EPOCH}; @@ -28,18 +29,34 @@ /// `.hg/requires` already governs which format should be used. pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; +/// Keep space for 256-bit hashes +const STORED_NODE_ID_BYTES: usize = 32; + +/// … even though only 160 bits are used for now, with SHA-1 +const USED_NODE_ID_BYTES: usize = 20; + pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; +// Must match `HEADER` in `mercurial/dirstateutils/docket.py` +#[derive(BytesCast)] +#[repr(C)] +struct DocketHeader { + marker: [u8; V2_FORMAT_MARKER.len()], + parent_1: [u8; STORED_NODE_ID_BYTES], + parent_2: [u8; STORED_NODE_ID_BYTES], + data_size: Size, + uuid_size: u8, +} + +pub struct Docket<'on_disk> { + header: &'on_disk DocketHeader, + uuid: &'on_disk [u8], +} + #[derive(BytesCast)] #[repr(C)] struct Header { - marker: [u8; V2_FORMAT_MARKER.len()], - - /// `dirstatemap.parents()` in `mercurial/dirstate.py` relies on this - /// `parents` field being at this offset, immediately after `marker`. - parents: DirstateParents, - root: ChildNodes, nodes_with_entry_count: Size, nodes_with_copy_source_count: Size, @@ -172,7 +189,8 @@ /// Make sure that size-affecting changes are made knowingly fn _static_assert_size_of() { - let _ = std::mem::transmute::<Header, [u8; 88]>; + let _ = std::mem::transmute::<DocketHeader, [u8; 81]>; + let _ = std::mem::transmute::<Header, [u8; 36]>; let _ = std::mem::transmute::<Node, [u8; 49]>; } @@ -194,11 +212,31 @@ } } -fn read_header(on_disk: &[u8]) -> Result<&Header, DirstateV2ParseError> { - let (header, _) = - Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; - if header.marker == *V2_FORMAT_MARKER { - Ok(header) +impl<'on_disk> Docket<'on_disk> { + pub fn parents(&self) -> DirstateParents { + use crate::Node; + let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) + .unwrap() + .clone(); + let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) + .unwrap() + .clone(); + DirstateParents { p1, p2 } + } + + pub fn data_filename(&self) -> String { + String::from_utf8(format_bytes!(b"dirstate.{}.d", self.uuid)).unwrap() + } +} + +pub fn read_docket( + on_disk: &[u8], +) -> Result<Docket<'_>, DirstateV2ParseError> { + let (header, uuid) = + DocketHeader::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; + let uuid_size = header.uuid_size as usize; + if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { + Ok(Docket { header, uuid }) } else { Err(DirstateV2ParseError) } @@ -206,14 +244,12 @@ pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], -) -> Result< - (DirstateMap<'on_disk>, Option<DirstateParents>), - DirstateV2ParseError, -> { +) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { if on_disk.is_empty() { - return Ok((DirstateMap::empty(on_disk), None)); + return Ok(DirstateMap::empty(on_disk)); } - let header = read_header(on_disk)?; + let (header, _) = + Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; let dirstate_map = DirstateMap { on_disk, root: dirstate_map::ChildNodes::OnDisk(read_slice::<Node>( @@ -226,8 +262,7 @@ .get(), ignore_patterns_hash: header.ignore_patterns_hash, }; - let parents = Some(header.parents.clone()); - Ok((dirstate_map, parents)) + Ok(dirstate_map) } impl Node { @@ -447,17 +482,12 @@ .ok_or_else(|| DirstateV2ParseError) } -pub(crate) fn parse_dirstate_parents( - on_disk: &[u8], -) -> Result<&DirstateParents, HgError> { - Ok(&read_header(on_disk)?.parents) -} - pub(crate) fn for_each_tracked_path<'on_disk>( on_disk: &'on_disk [u8], mut f: impl FnMut(&'on_disk HgPath), ) -> Result<(), DirstateV2ParseError> { - let header = read_header(on_disk)?; + let (header, _) = + Header::from_bytes(on_disk).map_err(|_| DirstateV2ParseError)?; fn recur<'on_disk>( on_disk: &'on_disk [u8], nodes: Slice, @@ -478,7 +508,6 @@ pub(super) fn write( dirstate_map: &mut DirstateMap, - parents: DirstateParents, ) -> Result<Vec<u8>, DirstateError> { let header_len = std::mem::size_of::<Header>(); @@ -497,8 +526,6 @@ write_nodes(dirstate_map, dirstate_map.root.as_ref(), &mut out)?; let header = Header { - marker: *V2_FORMAT_MARKER, - parents: parents, root, nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), nodes_with_copy_source_count: dirstate_map