Mercurial > hg
view rust/hg-core/src/dirstate/on_disk.rs @ 52304:04b9a56c2d25
rust-lib: only export very common types to the top of the crate
This was done very early in the Rust project's lifecycle and I had very little
Rust experience. Let's keep the `DirstateParents` since they'll pop up in
all higher-level code and make the rest more explicit imports to make the
imports less confusing and the lib less cluttered.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 04 Nov 2024 11:13:05 +0100 |
parents | b422acba55f1 |
children | 79e8118cd846 |
line wrap: on
line source
//! The "version 2" disk representation of the dirstate //! //! See `mercurial/helptext/internals/dirstate-v2.txt` use crate::dirstate::dirstate_map::DirstateVersion; use crate::dirstate::dirstate_map::{ self, DirstateMap, DirstateMapWriteMode, NodeRef, }; use crate::dirstate::entry::{ DirstateEntry, DirstateV2Data, TruncatedTimestamp, }; use crate::dirstate::path_with_basename::WithBasename; use crate::errors::{HgError, IoResultExt}; use crate::repo::Repo; use crate::requirements::DIRSTATE_TRACKED_HINT_V1; use crate::utils::hg_path::HgPath; use crate::DirstateError; use crate::DirstateParents; use bitflags::bitflags; use bytes_cast::unaligned::{U16Be, U32Be}; use bytes_cast::BytesCast; use format_bytes::format_bytes; use rand::Rng; use std::borrow::Cow; use std::fmt::Write; use uuid::Uuid; use super::dirstate_map::DirstateIdentity; /// Added at the start of `.hg/dirstate` when the "v2" format is used. /// This a redundant sanity check more than an actual "magic number" since /// `.hg/requires` already governs which format should be used. pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; /// Keep space for 256-bit hashes const STORED_NODE_ID_BYTES: usize = 32; /// … even though only 160 bits are used for now, with SHA-1 const USED_NODE_ID_BYTES: usize = 20; pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; /// Must match constants of the same names in `mercurial/dirstateutils/v2.py` const TREE_METADATA_SIZE: usize = 44; const NODE_SIZE: usize = 44; /// Make sure that size-affecting changes are made knowingly #[allow(unused)] fn static_assert_size_of() { let _ = std::mem::transmute::<TreeMetadata, [u8; TREE_METADATA_SIZE]>; let _ = std::mem::transmute::<DocketHeader, [u8; TREE_METADATA_SIZE + 81]>; let _ = std::mem::transmute::<Node, [u8; NODE_SIZE]>; } // Must match `HEADER` in `mercurial/dirstateutils/docket.py` #[derive(BytesCast)] #[repr(C)] struct DocketHeader { marker: [u8; V2_FORMAT_MARKER.len()], parent_1: [u8; STORED_NODE_ID_BYTES], parent_2: [u8; STORED_NODE_ID_BYTES], metadata: TreeMetadata, /// Counted in bytes data_size: Size, uuid_size: u8, } pub struct Docket<'on_disk> { header: &'on_disk DocketHeader, pub uuid: &'on_disk [u8], } /// Fields are documented in the *Tree metadata in the docket file* /// section of `mercurial/helptext/internals/dirstate-v2.txt` #[derive(BytesCast)] #[repr(C)] pub struct TreeMetadata { root_nodes: ChildNodes, nodes_with_entry_count: Size, nodes_with_copy_source_count: Size, unreachable_bytes: Size, unused: [u8; 4], /// See *Optional hash of ignore patterns* section of /// `mercurial/helptext/internals/dirstate-v2.txt` ignore_patterns_hash: IgnorePatternsHash, } /// Fields are documented in the *The data file format* /// section of `mercurial/helptext/internals/dirstate-v2.txt` #[derive(BytesCast, Debug)] #[repr(C)] pub(super) struct Node { full_path: PathSlice, /// In bytes from `self.full_path.start` base_name_start: PathSize, copy_source: OptPathSlice, children: ChildNodes, pub(super) descendants_with_entry_count: Size, pub(super) tracked_descendants_count: Size, flags: U16Be, size: U32Be, mtime: PackedTruncatedTimestamp, } bitflags! { #[repr(C)] struct Flags: u16 { const WDIR_TRACKED = 1 << 0; const P1_TRACKED = 1 << 1; const P2_INFO = 1 << 2; const MODE_EXEC_PERM = 1 << 3; const MODE_IS_SYMLINK = 1 << 4; const HAS_FALLBACK_EXEC = 1 << 5; const FALLBACK_EXEC = 1 << 6; const HAS_FALLBACK_SYMLINK = 1 << 7; const FALLBACK_SYMLINK = 1 << 8; const EXPECTED_STATE_IS_MODIFIED = 1 << 9; const HAS_MODE_AND_SIZE = 1 <<10; const HAS_MTIME = 1 <<11; const MTIME_SECOND_AMBIGUOUS = 1 << 12; const DIRECTORY = 1 <<13; const ALL_UNKNOWN_RECORDED = 1 <<14; const ALL_IGNORED_RECORDED = 1 <<15; } } /// Duration since the Unix epoch #[derive(BytesCast, Copy, Clone, Debug)] #[repr(C)] struct PackedTruncatedTimestamp { truncated_seconds: U32Be, nanoseconds: U32Be, } /// Counted in bytes from the start of the file /// /// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. type Offset = U32Be; /// Counted in number of items /// /// NOTE: we choose not to support counting more than 4 billion nodes anywhere. type Size = U32Be; /// Counted in bytes /// /// NOTE: we choose not to support file names/paths longer than 64 KiB. type PathSize = U16Be; /// A contiguous sequence of `len` times `Node`, representing the child nodes /// of either some other node or of the repository root. /// /// Always sorted by ascending `full_path`, to allow binary search. /// Since nodes with the same parent nodes also have the same parent path, /// only the `base_name`s need to be compared during binary search. #[derive(BytesCast, Copy, Clone, Debug)] #[repr(C)] struct ChildNodes { start: Offset, len: Size, } /// A `HgPath` of `len` bytes #[derive(BytesCast, Copy, Clone, Debug)] #[repr(C)] struct PathSlice { start: Offset, len: PathSize, } /// Either nothing if `start == 0`, or a `HgPath` of `len` bytes type OptPathSlice = PathSlice; /// Unexpected file format found in `.hg/dirstate` with the "v2" format. /// /// This should only happen if Mercurial is buggy or a repository is corrupted. #[derive(Debug)] pub struct DirstateV2ParseError { message: String, } impl DirstateV2ParseError { pub fn new<S: Into<String>>(message: S) -> Self { Self { message: message.into(), } } } impl From<DirstateV2ParseError> for HgError { fn from(e: DirstateV2ParseError) -> Self { HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message)) } } impl From<DirstateV2ParseError> for crate::DirstateError { fn from(error: DirstateV2ParseError) -> Self { HgError::from(error).into() } } impl TreeMetadata { pub fn as_bytes(&self) -> &[u8] { BytesCast::as_bytes(self) } } impl<'on_disk> Docket<'on_disk> { /// Generate the identifier for a new data file /// /// TODO: support the `HGTEST_UUIDFILE` environment variable. /// See `mercurial/revlogutils/docket.py` pub fn new_uid() -> String { const ID_LENGTH: usize = 8; let mut id = String::with_capacity(ID_LENGTH); let mut rng = rand::thread_rng(); for _ in 0..ID_LENGTH { // One random hexadecimal digit. // `unwrap` never panics because `impl Write for String` // never returns an error. write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap(); } id } pub fn serialize( parents: DirstateParents, tree_metadata: TreeMetadata, data_size: u64, uuid: &[u8], ) -> Result<Vec<u8>, std::num::TryFromIntError> { let header = DocketHeader { marker: *V2_FORMAT_MARKER, parent_1: parents.p1.pad_to_256_bits(), parent_2: parents.p2.pad_to_256_bits(), metadata: tree_metadata, data_size: u32::try_from(data_size)?.into(), uuid_size: uuid.len().try_into()?, }; let header = header.as_bytes(); let mut docket = Vec::with_capacity(header.len() + uuid.len()); docket.extend_from_slice(header); docket.extend_from_slice(uuid); Ok(docket) } pub fn parents(&self) -> DirstateParents { use crate::Node; let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) .unwrap(); let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) .unwrap(); DirstateParents { p1, p2 } } pub fn tree_metadata(&self) -> &[u8] { self.header.metadata.as_bytes() } pub fn data_size(&self) -> usize { // This `unwrap` could only panic on a 16-bit CPU self.header.data_size.get().try_into().unwrap() } pub fn data_filename(&self) -> String { String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap() } } pub fn read_docket( on_disk: &[u8], ) -> Result<Docket<'_>, DirstateV2ParseError> { let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| { DirstateV2ParseError::new(format!("when reading docket, {}", e)) })?; let uuid_size = header.uuid_size as usize; if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { Ok(Docket { header, uuid }) } else { Err(DirstateV2ParseError::new( "invalid format marker or uuid size", )) } } pub(super) fn read<'on_disk>( on_disk: &'on_disk [u8], metadata: &[u8], uuid: Vec<u8>, identity: Option<DirstateIdentity>, ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { if on_disk.is_empty() { let mut map = DirstateMap::empty(on_disk); map.identity = identity; map.old_uuid = Some(uuid); map.dirstate_version = DirstateVersion::V2; return Ok(map); } let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| { DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e)) })?; let dirstate_map = DirstateMap { on_disk, root: dirstate_map::ChildNodes::OnDisk( read_nodes(on_disk, meta.root_nodes).map_err(|mut e| { e.message = format!("{}, when reading root notes", e.message); e })?, ), nodes_with_entry_count: meta.nodes_with_entry_count.get(), nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), ignore_patterns_hash: meta.ignore_patterns_hash, unreachable_bytes: meta.unreachable_bytes.get(), old_data_size: on_disk.len(), old_uuid: Some(uuid), identity, dirstate_version: DirstateVersion::V2, write_mode: DirstateMapWriteMode::Auto, use_tracked_hint: false, }; Ok(dirstate_map) } impl Node { pub(super) fn full_path<'on_disk>( &self, on_disk: &'on_disk [u8], ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { read_hg_path(on_disk, self.full_path) } pub(super) fn base_name_start( &self, ) -> Result<usize, DirstateV2ParseError> { let start = self.base_name_start.get(); if start < self.full_path.len.get() { let start = usize::from(start); Ok(start) } else { Err(DirstateV2ParseError::new("not enough bytes for base name")) } } pub(super) fn base_name<'on_disk>( &self, on_disk: &'on_disk [u8], ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { let full_path = self.full_path(on_disk)?; let base_name_start = self.base_name_start()?; Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) } pub(super) fn path<'on_disk>( &self, on_disk: &'on_disk [u8], ) -> Result<dirstate_map::NodeKey<'on_disk>, DirstateV2ParseError> { Ok(WithBasename::from_raw_parts( Cow::Borrowed(self.full_path(on_disk)?), self.base_name_start()?, )) } pub(super) fn has_copy_source(&self) -> bool { self.copy_source.start.get() != 0 } pub(super) fn copy_source<'on_disk>( &self, on_disk: &'on_disk [u8], ) -> Result<Option<&'on_disk HgPath>, DirstateV2ParseError> { Ok(if self.has_copy_source() { Some(read_hg_path(on_disk, self.copy_source)?) } else { None }) } fn flags(&self) -> Flags { Flags::from_bits_truncate(self.flags.get()) } fn has_entry(&self) -> bool { self.flags().intersects( Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, ) } pub(super) fn node_data( &self, ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { if self.has_entry() { Ok(dirstate_map::NodeData::Entry(self.assume_entry()?)) } else if let Some(mtime) = self.cached_directory_mtime()? { Ok(dirstate_map::NodeData::CachedDirectory { mtime }) } else { Ok(dirstate_map::NodeData::None) } } pub(super) fn cached_directory_mtime( &self, ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> { // For now we do not have code to handle the absence of // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is // unset. if self.flags().contains(Flags::DIRECTORY) && self.flags().contains(Flags::HAS_MTIME) && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) { Ok(Some(self.mtime()?)) } else { Ok(None) } } fn synthesize_unix_mode(&self) -> u32 { // Some platforms' libc don't have the same type (MacOS uses i32 here) #[allow(clippy::unnecessary_cast)] let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) { libc::S_IFLNK as u32 } else { libc::S_IFREG as u32 }; let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) { 0o755 } else { 0o644 }; file_type | permissions } fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> { let mut m: TruncatedTimestamp = self.mtime.try_into()?; if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { m.second_ambiguous = true; } Ok(m) } fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> { // TODO: convert through raw bits instead? let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED); let p1_tracked = self.flags().contains(Flags::P1_TRACKED); let p2_info = self.flags().contains(Flags::P2_INFO); let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) { Some((self.synthesize_unix_mode(), self.size.into())) } else { None }; let mtime = if self.flags().contains(Flags::HAS_MTIME) && !self.flags().contains(Flags::DIRECTORY) && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) { Some(self.mtime()?) } else { None }; let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC) { Some(self.flags().contains(Flags::FALLBACK_EXEC)) } else { None }; let fallback_symlink = if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) { Some(self.flags().contains(Flags::FALLBACK_SYMLINK)) } else { None }; Ok(DirstateEntry::from_v2_data(DirstateV2Data { wc_tracked, p1_tracked, p2_info, mode_size, mtime, fallback_exec, fallback_symlink, })) } pub(super) fn entry( &self, ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { if self.has_entry() { Ok(Some(self.assume_entry()?)) } else { Ok(None) } } pub(super) fn children<'on_disk>( &self, on_disk: &'on_disk [u8], ) -> Result<&'on_disk [Node], DirstateV2ParseError> { read_nodes(on_disk, self.children) } pub(super) fn to_in_memory_node<'on_disk>( &self, on_disk: &'on_disk [u8], ) -> Result<dirstate_map::Node<'on_disk>, DirstateV2ParseError> { Ok(dirstate_map::Node { children: dirstate_map::ChildNodes::OnDisk( self.children(on_disk)?, ), copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), data: self.node_data()?, descendants_with_entry_count: self .descendants_with_entry_count .get(), tracked_descendants_count: self.tracked_descendants_count.get(), }) } fn from_dirstate_entry( entry: &DirstateEntry, ) -> (Flags, U32Be, PackedTruncatedTimestamp) { let DirstateV2Data { wc_tracked, p1_tracked, p2_info, mode_size: mode_size_opt, mtime: mtime_opt, fallback_exec, fallback_symlink, } = entry.v2_data(); // TODO: convert through raw flag bits instead? let mut flags = Flags::empty(); flags.set(Flags::WDIR_TRACKED, wc_tracked); flags.set(Flags::P1_TRACKED, p1_tracked); flags.set(Flags::P2_INFO, p2_info); // Some platforms' libc don't have the same type (MacOS uses i32 here) #[allow(clippy::unnecessary_cast)] let size = if let Some((m, s)) = mode_size_opt { let exec_perm = m & (libc::S_IXUSR as u32) != 0; let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32; flags.set(Flags::MODE_EXEC_PERM, exec_perm); flags.set(Flags::MODE_IS_SYMLINK, is_symlink); flags.insert(Flags::HAS_MODE_AND_SIZE); s.into() } else { 0.into() }; let mtime = if let Some(m) = mtime_opt { flags.insert(Flags::HAS_MTIME); if m.second_ambiguous { flags.insert(Flags::MTIME_SECOND_AMBIGUOUS); }; m.into() } else { PackedTruncatedTimestamp::null() }; if let Some(f_exec) = fallback_exec { flags.insert(Flags::HAS_FALLBACK_EXEC); if f_exec { flags.insert(Flags::FALLBACK_EXEC); } } if let Some(f_symlink) = fallback_symlink { flags.insert(Flags::HAS_FALLBACK_SYMLINK); if f_symlink { flags.insert(Flags::FALLBACK_SYMLINK); } } (flags, size, mtime) } } fn read_hg_path( on_disk: &[u8], slice: PathSlice, ) -> Result<&HgPath, DirstateV2ParseError> { read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) } fn read_nodes( on_disk: &[u8], slice: ChildNodes, ) -> Result<&[Node], DirstateV2ParseError> { read_slice(on_disk, slice.start, slice.len.get()) } fn read_slice<T, Len>( on_disk: &[u8], start: Offset, len: Len, ) -> Result<&[T], DirstateV2ParseError> where T: BytesCast, Len: TryInto<usize>, { // Either `usize::MAX` would result in "out of bounds" error since a single // `&[u8]` cannot occupy the entire addess space. let start = start.get().try_into().unwrap_or(usize::MAX); let len = len.try_into().unwrap_or(usize::MAX); let bytes = match on_disk.get(start..) { Some(bytes) => bytes, None => { return Err(DirstateV2ParseError::new( "not enough bytes from disk", )) } }; T::slice_from_bytes(bytes, len) .map_err(|e| { DirstateV2ParseError::new(format!("when reading a slice, {}", e)) }) .map(|(slice, _rest)| slice) } /// Returns new data and metadata, together with whether that data should be /// appended to the existing data file whose content is at /// `dirstate_map.on_disk` (true), instead of written to a new data file /// (false), and the previous size of data on disk. pub(super) fn write( dirstate_map: &DirstateMap, write_mode: DirstateMapWriteMode, ) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> { let append = match write_mode { DirstateMapWriteMode::Auto => dirstate_map.write_should_append(), DirstateMapWriteMode::ForceNewDataFile => false, DirstateMapWriteMode::ForceAppend => true, }; if append { log::trace!("appending to the dirstate data file"); } else { log::trace!("creating new dirstate data file"); } // This ignores the space for paths, and for nodes without an entry. // TODO: better estimate? Skip the `Vec` and write to a file directly? let size_guess = std::mem::size_of::<Node>() * dirstate_map.nodes_with_entry_count as usize; let mut writer = Writer { dirstate_map, append, out: Vec::with_capacity(size_guess), }; let root_nodes = dirstate_map.root.as_ref(); for node in root_nodes.iter() { // Catch some corruptions before we write to disk let full_path = node.full_path(dirstate_map.on_disk)?; let base_name = node.base_name(dirstate_map.on_disk)?; if full_path != base_name { let explanation = format!( "Dirstate root node '{}' is not at the root", full_path ); return Err(HgError::corrupted(explanation).into()); } } let root_nodes = writer.write_nodes(root_nodes)?; let unreachable_bytes = if append { dirstate_map.unreachable_bytes } else { 0 }; let meta = TreeMetadata { root_nodes, nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), nodes_with_copy_source_count: dirstate_map .nodes_with_copy_source_count .into(), unreachable_bytes: unreachable_bytes.into(), unused: [0; 4], ignore_patterns_hash: dirstate_map.ignore_patterns_hash, }; Ok((writer.out, meta, append, dirstate_map.old_data_size)) } struct Writer<'dmap, 'on_disk> { dirstate_map: &'dmap DirstateMap<'on_disk>, append: bool, out: Vec<u8>, } impl Writer<'_, '_> { fn write_nodes( &mut self, nodes: dirstate_map::ChildNodesRef, ) -> Result<ChildNodes, DirstateError> { // Reuse already-written nodes if possible if self.append { if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { let start = self.on_disk_offset_of(nodes_slice).expect( "dirstate-v2 OnDisk nodes not found within on_disk", ); let len = child_nodes_len_from_usize(nodes_slice.len()); return Ok(ChildNodes { start, len }); } } // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has // undefined iteration order. Sort to enable binary search in the // written file. let nodes = nodes.sorted(); let nodes_len = nodes.len(); // First accumulate serialized nodes in a `Vec` let mut on_disk_nodes = Vec::with_capacity(nodes_len); for node in nodes { let children = node.children(self.dirstate_map.on_disk)?; let full_path = node.full_path(self.dirstate_map.on_disk)?; self.check_children(&children, full_path)?; let children = self.write_nodes(children)?; let full_path = self.write_path(full_path.as_bytes()); let copy_source = if let Some(source) = node.copy_source(self.dirstate_map.on_disk)? { self.write_path(source.as_bytes()) } else { PathSlice { start: 0.into(), len: 0.into(), } }; on_disk_nodes.push(match node { NodeRef::InMemory(path, node) => { let (flags, size, mtime) = match &node.data { dirstate_map::NodeData::Entry(entry) => { Node::from_dirstate_entry(entry) } dirstate_map::NodeData::CachedDirectory { mtime } => { // we currently never set a mtime if unknown file // are present. // So if we have a mtime for a directory, we know // they are no unknown // files and we // blindly set ALL_UNKNOWN_RECORDED. // // We never set ALL_IGNORED_RECORDED since we // don't track that case // currently. let mut flags = Flags::DIRECTORY | Flags::HAS_MTIME | Flags::ALL_UNKNOWN_RECORDED; if mtime.second_ambiguous { flags.insert(Flags::MTIME_SECOND_AMBIGUOUS) } (flags, 0.into(), (*mtime).into()) } dirstate_map::NodeData::None => ( Flags::DIRECTORY, 0.into(), PackedTruncatedTimestamp::null(), ), }; Node { children, copy_source, full_path, base_name_start: u16::try_from(path.base_name_start()) // Could only panic for paths over 64 KiB .expect("dirstate-v2 path length overflow") .into(), descendants_with_entry_count: node .descendants_with_entry_count .into(), tracked_descendants_count: node .tracked_descendants_count .into(), flags: flags.bits().into(), size, mtime, } } NodeRef::OnDisk(node) => Node { children, copy_source, full_path, ..*node }, }) } // … so we can write them contiguously, after writing everything else // they refer to. let start = self.current_offset(); let len = child_nodes_len_from_usize(nodes_len); self.out.extend(on_disk_nodes.as_bytes()); Ok(ChildNodes { start, len }) } /// Catch some dirstate corruptions before writing them to disk fn check_children( &mut self, children: &dirstate_map::ChildNodesRef, full_path: &HgPath, ) -> Result<(), DirstateError> { for child in children.iter() { let child_full_path = child.full_path(self.dirstate_map.on_disk)?; let prefix_length = child_full_path.len() // remove the filename - child.base_name(self.dirstate_map.on_disk)?.len() // remove the slash - 1; let child_prefix = &child_full_path.as_bytes()[..prefix_length]; if child_prefix != full_path.as_bytes() { let explanation = format!( "dirstate child node's path '{}' \ does not start with its parent's path '{}'", child_full_path, full_path, ); return Err(HgError::corrupted(explanation).into()); } } Ok(()) } /// If the given slice of items is within `on_disk`, returns its offset /// from the start of `on_disk`. fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset> where T: BytesCast, { fn address_range(slice: &[u8]) -> std::ops::RangeInclusive<usize> { let start = slice.as_ptr() as usize; let end = start + slice.len(); start..=end } let slice_addresses = address_range(slice.as_bytes()); let on_disk_addresses = address_range(self.dirstate_map.on_disk); if on_disk_addresses.contains(slice_addresses.start()) && on_disk_addresses.contains(slice_addresses.end()) { let offset = slice_addresses.start() - on_disk_addresses.start(); Some(offset_from_usize(offset)) } else { None } } fn current_offset(&mut self) -> Offset { let mut offset = self.out.len(); if self.append { offset += self.dirstate_map.on_disk.len() } offset_from_usize(offset) } fn write_path(&mut self, slice: &[u8]) -> PathSlice { let len = path_len_from_usize(slice.len()); // Reuse an already-written path if possible if self.append { if let Some(start) = self.on_disk_offset_of(slice) { return PathSlice { start, len }; } } let start = self.current_offset(); self.out.extend(slice.as_bytes()); PathSlice { start, len } } } fn offset_from_usize(x: usize) -> Offset { u32::try_from(x) // Could only panic for a dirstate file larger than 4 GiB .expect("dirstate-v2 offset overflow") .into() } fn child_nodes_len_from_usize(x: usize) -> Size { u32::try_from(x) // Could only panic with over 4 billion nodes .expect("dirstate-v2 slice length overflow") .into() } fn path_len_from_usize(x: usize) -> PathSize { u16::try_from(x) // Could only panic for paths over 64 KiB .expect("dirstate-v2 path length overflow") .into() } impl From<TruncatedTimestamp> for PackedTruncatedTimestamp { fn from(timestamp: TruncatedTimestamp) -> Self { Self { truncated_seconds: timestamp.truncated_seconds().into(), nanoseconds: timestamp.nanoseconds().into(), } } } impl TryFrom<PackedTruncatedTimestamp> for TruncatedTimestamp { type Error = DirstateV2ParseError; fn try_from( timestamp: PackedTruncatedTimestamp, ) -> Result<Self, Self::Error> { Self::from_already_truncated( timestamp.truncated_seconds.get(), timestamp.nanoseconds.get(), false, ) } } impl PackedTruncatedTimestamp { fn null() -> Self { Self { truncated_seconds: 0.into(), nanoseconds: 0.into(), } } } /// Write a new tracked key to disk. /// See `format.use-dirstate-tracked-hint` config help for more details. pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> { // TODO move this to the dirstate itself once it grows a `dirty` flag and // can reason about which context it needs to write this in. // For now, only this fast-path needs to think about the tracked hint. // Use [`crate::dirstate::dirstate_map::DirstateMap:: // use_tracked_hint`] instead of looking at the requirements once // refactored. if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) { return Ok(()); } // TODO use `hg_vfs` once the `InnerRevlog` is in. let path = repo .working_directory_path() .join(".hg/dirstate-tracked-hint"); std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path) }