# HG changeset patch # User Raphaël Gomès # Date 1730714458 -3600 # Node ID db065b33fa56ad35b4b653ddbf0ee743f973ad0f # Parent 7ffc715526629c764a31f3bf12ca4b45cbe9d01f rust-dirstate: merge `dirstate_tree` module into `dirstate` The historical reasonning for `dirstate_tree` existing in the first place is that a new approach was needed for the tree-like dirstate and it was easier to start somewhat fresh. Now that the former dirstate is (long) gone, we can merge those two modules to avoid the confusion that even the module creators sometimes get. diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate.rs --- a/rust/hg-core/src/dirstate.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/dirstate.rs Mon Nov 04 11:00:58 2024 +0100 @@ -5,15 +5,19 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -use crate::dirstate_tree::on_disk::DirstateV2ParseError; +use crate::dirstate::on_disk::DirstateV2ParseError; use crate::revlog::node::NULL_NODE; use crate::revlog::Node; use crate::utils::hg_path::HgPath; use bytes_cast::BytesCast; pub mod dirs_multiset; +pub mod dirstate_map; pub mod entry; +pub mod on_disk; +pub mod owning; pub mod parsers; +pub mod path_with_basename; pub mod status; pub use self::entry::*; diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/dirs_multiset.rs --- a/rust/hg-core/src/dirstate/dirs_multiset.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/dirstate/dirs_multiset.rs Mon Nov 04 11:00:58 2024 +0100 @@ -8,7 +8,7 @@ //! A multiset of directory names. //! //! Used to counts the references to directories in a manifest or dirstate. -use crate::dirstate_tree::on_disk::DirstateV2ParseError; +use crate::dirstate::on_disk::DirstateV2ParseError; use crate::{ utils::{ files, diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/dirstate_map.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate/dirstate_map.rs Mon Nov 04 11:00:58 2024 +0100 @@ -0,0 +1,2123 @@ +use bytes_cast::BytesCast; +use std::borrow::Cow; +use std::fs::Metadata; +use std::os::unix::fs::MetadataExt; +use std::path::PathBuf; + +use super::on_disk; +use super::on_disk::DirstateV2ParseError; +use super::owning::OwningDirstateMap; +use super::path_with_basename::WithBasename; +use crate::dirstate::parsers::pack_entry; +use crate::dirstate::parsers::packed_entry_size; +use crate::dirstate::parsers::parse_dirstate_entries; +use crate::dirstate::CopyMapIter; +use crate::dirstate::DirstateV2Data; +use crate::dirstate::ParentFileData; +use crate::dirstate::StateMapIter; +use crate::dirstate::TruncatedTimestamp; +use crate::matchers::Matcher; +use crate::utils::filter_map_results; +use crate::utils::hg_path::{HgPath, HgPathBuf}; +use crate::DirstateEntry; +use crate::DirstateError; +use crate::DirstateMapError; +use crate::DirstateParents; +use crate::DirstateStatus; +use crate::FastHashbrownMap as FastHashMap; +use crate::PatternFileWarning; +use crate::StatusError; +use crate::StatusOptions; + +/// Append to an existing data file if the amount of unreachable data (not used +/// anymore) is less than this fraction of the total amount of existing data. +const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5; + +#[derive(Debug, PartialEq, Eq)] +/// Version of the on-disk format +pub enum DirstateVersion { + V1, + V2, +} + +#[derive(Debug, PartialEq, Eq)] +pub enum DirstateMapWriteMode { + Auto, + ForceNewDataFile, + ForceAppend, +} + +/// Used to detect out-of-process changes in the dirstate +#[derive(Debug, Copy, Clone)] +pub struct DirstateIdentity { + pub mode: u32, + pub dev: u64, + pub ino: u64, + pub nlink: u64, + pub uid: u32, + pub gid: u32, + pub size: u64, + pub mtime: i64, + pub mtime_nsec: i64, + pub ctime: i64, + pub ctime_nsec: i64, +} + +impl From for DirstateIdentity { + fn from(value: Metadata) -> Self { + Self { + mode: value.mode(), + dev: value.dev(), + ino: value.ino(), + nlink: value.nlink(), + uid: value.uid(), + gid: value.gid(), + size: value.size(), + mtime: value.mtime(), + mtime_nsec: value.mtime_nsec(), + ctime: value.ctime(), + ctime_nsec: value.ctime_nsec(), + } + } +} + +impl PartialEq for DirstateIdentity { + fn eq(&self, other: &Self) -> bool { + // Some platforms return 0 when they have no support for nanos. + // This shouldn't be a problem in practice because of how highly + // unlikely it is that we actually get exactly 0 nanos, and worst + // case scenario, we don't write out the dirstate in a non-wlocked + // situation like status. + let mtime_nanos_equal = (self.mtime_nsec == 0 + || other.mtime_nsec == 0) + || self.mtime_nsec == other.mtime_nsec; + let ctime_nanos_equal = (self.ctime_nsec == 0 + || other.ctime_nsec == 0) + || self.ctime_nsec == other.ctime_nsec; + + self.mode == other.mode + && self.dev == other.dev + && self.ino == other.ino + && self.nlink == other.nlink + && self.uid == other.uid + && self.gid == other.gid + && self.size == other.size + && self.mtime == other.mtime + && mtime_nanos_equal + && self.ctime == other.ctime + && ctime_nanos_equal + } +} + +#[derive(Debug)] +pub struct DirstateMap<'on_disk> { + /// Contents of the `.hg/dirstate` file + pub(super) on_disk: &'on_disk [u8], + + pub(super) root: ChildNodes<'on_disk>, + + /// Number of nodes anywhere in the tree that have `.entry.is_some()`. + pub(super) nodes_with_entry_count: u32, + + /// Number of nodes anywhere in the tree that have + /// `.copy_source.is_some()`. + pub(super) nodes_with_copy_source_count: u32, + + /// See on_disk::Header + pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash, + + /// How many bytes of `on_disk` are not used anymore + pub(super) unreachable_bytes: u32, + + /// Size of the data used to first load this `DirstateMap`. Used in case + /// we need to write some new metadata, but no new data on disk, + /// as well as to detect writes that have happened in another process + /// since first read. + pub(super) old_data_size: usize, + + /// UUID used when first loading this `DirstateMap`. Used to check if + /// the UUID has been changed by another process since first read. + /// Can be `None` if using dirstate v1 or if it's a brand new dirstate. + pub(super) old_uuid: Option>, + + /// Identity of the dirstate file (for dirstate-v1) or the docket file + /// (v2). Used to detect if the file has changed from another process. + /// Since it's always written atomically, we can compare the inode to + /// check the file identity. + /// + /// TODO On non-Unix systems, something like hashing is a possibility? + pub(super) identity: Option, + + pub(super) dirstate_version: DirstateVersion, + + /// Controlled by config option `devel.dirstate.v2.data_update_mode` + pub(super) write_mode: DirstateMapWriteMode, + + /// Controlled by config option `format.use-dirstate-tracked-hint` + pub(super) use_tracked_hint: bool, +} + +/// Using a plain `HgPathBuf` of the full path from the repository root as a +/// map key would also work: all paths in a given map have the same parent +/// path, so comparing full paths gives the same result as comparing base +/// names. However `HashMap` would waste time always re-hashing the same +/// string prefix. +pub(super) type NodeKey<'on_disk> = WithBasename>; + +/// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned +/// for on-disk nodes that don’t actually have a `Cow` to borrow. +#[derive(Debug)] +pub(super) enum BorrowedPath<'tree, 'on_disk> { + InMemory(&'tree HgPathBuf), + OnDisk(&'on_disk HgPath), +} + +#[derive(Debug)] +pub(super) enum ChildNodes<'on_disk> { + InMemory(FastHashMap, Node<'on_disk>>), + OnDisk(&'on_disk [on_disk::Node]), +} + +#[derive(Debug)] +pub(super) enum ChildNodesRef<'tree, 'on_disk> { + InMemory(&'tree FastHashMap, Node<'on_disk>>), + OnDisk(&'on_disk [on_disk::Node]), +} + +#[derive(Debug)] +pub(super) enum NodeRef<'tree, 'on_disk> { + InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>), + OnDisk(&'on_disk on_disk::Node), +} + +impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> { + pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> { + match *self { + BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()), + BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk), + } + } +} + +impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> { + type Target = HgPath; + + fn deref(&self) -> &HgPath { + match *self { + BorrowedPath::InMemory(in_memory) => in_memory, + BorrowedPath::OnDisk(on_disk) => on_disk, + } + } +} + +impl Default for ChildNodes<'_> { + fn default() -> Self { + ChildNodes::InMemory(Default::default()) + } +} + +impl<'on_disk> ChildNodes<'on_disk> { + pub(super) fn as_ref<'tree>( + &'tree self, + ) -> ChildNodesRef<'tree, 'on_disk> { + match self { + ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes), + ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes), + } + } + + pub(super) fn is_empty(&self) -> bool { + match self { + ChildNodes::InMemory(nodes) => nodes.is_empty(), + ChildNodes::OnDisk(nodes) => nodes.is_empty(), + } + } + + fn make_mut( + &mut self, + on_disk: &'on_disk [u8], + unreachable_bytes: &mut u32, + ) -> Result< + &mut FastHashMap, Node<'on_disk>>, + DirstateV2ParseError, + > { + match self { + ChildNodes::InMemory(nodes) => Ok(nodes), + ChildNodes::OnDisk(nodes) => { + *unreachable_bytes += + std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32; + let nodes = nodes + .iter() + .map(|node| { + Ok(( + node.path(on_disk)?, + node.to_in_memory_node(on_disk)?, + )) + }) + .collect::>()?; + *self = ChildNodes::InMemory(nodes); + match self { + ChildNodes::InMemory(nodes) => Ok(nodes), + ChildNodes::OnDisk(_) => unreachable!(), + } + } + } + } +} + +impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> { + pub(super) fn get( + &self, + base_name: &HgPath, + on_disk: &'on_disk [u8], + ) -> Result>, DirstateV2ParseError> { + match self { + ChildNodesRef::InMemory(nodes) => Ok(nodes + .get_key_value(base_name) + .map(|(k, v)| NodeRef::InMemory(k, v))), + ChildNodesRef::OnDisk(nodes) => { + let mut parse_result = Ok(()); + let search_result = nodes.binary_search_by(|node| { + match node.base_name(on_disk) { + Ok(node_base_name) => node_base_name.cmp(base_name), + Err(e) => { + parse_result = Err(e); + // Dummy comparison result, `search_result` won’t + // be used since `parse_result` is an error + std::cmp::Ordering::Equal + } + } + }); + parse_result.map(|()| { + search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i])) + }) + } + } + } + + /// Iterate in undefined order + pub(super) fn iter( + &self, + ) -> impl Iterator> { + match self { + ChildNodesRef::InMemory(nodes) => itertools::Either::Left( + nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)), + ), + ChildNodesRef::OnDisk(nodes) => { + itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk)) + } + } + } + + /// Iterate in parallel in undefined order + pub(super) fn par_iter( + &self, + ) -> impl rayon::iter::ParallelIterator> + { + use rayon::prelude::*; + match self { + ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left( + nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)), + ), + ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right( + nodes.par_iter().map(NodeRef::OnDisk), + ), + } + } + + pub(super) fn sorted(&self) -> Vec> { + match self { + ChildNodesRef::InMemory(nodes) => { + let mut vec: Vec<_> = nodes + .iter() + .map(|(k, v)| NodeRef::InMemory(k, v)) + .collect(); + fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath { + match node { + NodeRef::InMemory(path, _node) => path.base_name(), + NodeRef::OnDisk(_) => unreachable!(), + } + } + // `sort_unstable_by_key` doesn’t allow keys borrowing from the + // value: https://github.com/rust-lang/rust/issues/34162 + vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b))); + vec + } + ChildNodesRef::OnDisk(nodes) => { + // Nodes on disk are already sorted + nodes.iter().map(NodeRef::OnDisk).collect() + } + } + } +} + +impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> { + pub(super) fn full_path( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'tree HgPath, DirstateV2ParseError> { + match self { + NodeRef::InMemory(path, _node) => Ok(path.full_path()), + NodeRef::OnDisk(node) => node.full_path(on_disk), + } + } + + /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk, + /// HgPath>` detached from `'tree` + pub(super) fn full_path_borrowed( + &self, + on_disk: &'on_disk [u8], + ) -> Result, DirstateV2ParseError> { + match self { + NodeRef::InMemory(path, _node) => match path.full_path() { + Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)), + Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)), + }, + NodeRef::OnDisk(node) => { + Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?)) + } + } + } + + pub(super) fn base_name( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'tree HgPath, DirstateV2ParseError> { + match self { + NodeRef::InMemory(path, _node) => Ok(path.base_name()), + NodeRef::OnDisk(node) => node.base_name(on_disk), + } + } + + pub(super) fn children( + &self, + on_disk: &'on_disk [u8], + ) -> Result, DirstateV2ParseError> { + match self { + NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()), + NodeRef::OnDisk(node) => { + Ok(ChildNodesRef::OnDisk(node.children(on_disk)?)) + } + } + } + + pub(super) fn has_copy_source(&self) -> bool { + match self { + NodeRef::InMemory(_path, node) => node.copy_source.is_some(), + NodeRef::OnDisk(node) => node.has_copy_source(), + } + } + + pub(super) fn copy_source( + &self, + on_disk: &'on_disk [u8], + ) -> Result, DirstateV2ParseError> { + match self { + NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()), + NodeRef::OnDisk(node) => node.copy_source(on_disk), + } + } + /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk, + /// HgPath>` detached from `'tree` + pub(super) fn copy_source_borrowed( + &self, + on_disk: &'on_disk [u8], + ) -> Result>, DirstateV2ParseError> + { + Ok(match self { + NodeRef::InMemory(_path, node) => { + node.copy_source.as_ref().map(|source| match source { + Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk), + Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory), + }) + } + NodeRef::OnDisk(node) => { + node.copy_source(on_disk)?.map(BorrowedPath::OnDisk) + } + }) + } + + pub(super) fn entry( + &self, + ) -> Result, DirstateV2ParseError> { + match self { + NodeRef::InMemory(_path, node) => { + Ok(node.data.as_entry().copied()) + } + NodeRef::OnDisk(node) => node.entry(), + } + } + + pub(super) fn cached_directory_mtime( + &self, + ) -> Result, DirstateV2ParseError> { + match self { + NodeRef::InMemory(_path, node) => Ok(match node.data { + NodeData::CachedDirectory { mtime } => Some(mtime), + _ => None, + }), + NodeRef::OnDisk(node) => node.cached_directory_mtime(), + } + } + + pub(super) fn descendants_with_entry_count(&self) -> u32 { + match self { + NodeRef::InMemory(_path, node) => { + node.descendants_with_entry_count + } + NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(), + } + } + + pub(super) fn tracked_descendants_count(&self) -> u32 { + match self { + NodeRef::InMemory(_path, node) => node.tracked_descendants_count, + NodeRef::OnDisk(node) => node.tracked_descendants_count.get(), + } + } +} + +/// Represents a file or a directory +#[derive(Default, Debug)] +pub(super) struct Node<'on_disk> { + pub(super) data: NodeData, + + pub(super) copy_source: Option>, + + pub(super) children: ChildNodes<'on_disk>, + + /// How many (non-inclusive) descendants of this node have an entry. + pub(super) descendants_with_entry_count: u32, + + /// How many (non-inclusive) descendants of this node have an entry whose + /// state is "tracked". + pub(super) tracked_descendants_count: u32, +} + +#[derive(Debug, Default)] +pub(super) enum NodeData { + Entry(DirstateEntry), + CachedDirectory { + mtime: TruncatedTimestamp, + }, + #[default] + None, +} + +impl NodeData { + fn has_entry(&self) -> bool { + matches!(self, NodeData::Entry(_)) + } + + fn as_entry(&self) -> Option<&DirstateEntry> { + match self { + NodeData::Entry(entry) => Some(entry), + _ => None, + } + } + + fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> { + match self { + NodeData::Entry(entry) => Some(entry), + _ => None, + } + } +} + +impl<'on_disk> DirstateMap<'on_disk> { + pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { + Self { + on_disk, + root: ChildNodes::default(), + nodes_with_entry_count: 0, + nodes_with_copy_source_count: 0, + ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], + unreachable_bytes: 0, + old_data_size: 0, + old_uuid: None, + identity: None, + dirstate_version: DirstateVersion::V1, + write_mode: DirstateMapWriteMode::Auto, + use_tracked_hint: false, + } + } + + #[logging_timer::time("trace")] + pub fn new_v2( + on_disk: &'on_disk [u8], + data_size: usize, + metadata: &[u8], + uuid: Vec, + identity: Option, + ) -> Result { + if let Some(data) = on_disk.get(..data_size) { + Ok(on_disk::read(data, metadata, uuid, identity)?) + } else { + Err(DirstateV2ParseError::new("not enough bytes on disk").into()) + } + } + + #[logging_timer::time("trace")] + pub fn new_v1( + on_disk: &'on_disk [u8], + identity: Option, + ) -> Result<(Self, Option), DirstateError> { + let mut map = Self::empty(on_disk); + map.identity = identity; + + if map.on_disk.is_empty() { + return Ok((map, None)); + } + + let parents = parse_dirstate_entries( + map.on_disk, + |path, entry, copy_source| { + let tracked = entry.tracked(); + let node = Self::get_or_insert_node_inner( + map.on_disk, + &mut map.unreachable_bytes, + &mut map.root, + path, + WithBasename::to_cow_borrowed, + |ancestor| { + if tracked { + ancestor.tracked_descendants_count += 1 + } + ancestor.descendants_with_entry_count += 1 + }, + )?; + assert!( + !node.data.has_entry(), + "duplicate dirstate entry in read" + ); + assert!( + node.copy_source.is_none(), + "duplicate dirstate entry in read" + ); + node.data = NodeData::Entry(*entry); + node.copy_source = copy_source.map(Cow::Borrowed); + map.nodes_with_entry_count += 1; + if copy_source.is_some() { + map.nodes_with_copy_source_count += 1 + } + Ok(()) + }, + )?; + let parents = Some(*parents); + + Ok((map, parents)) + } + + /// Assuming dirstate-v2 format, returns whether the next write should + /// append to the existing data file that contains `self.on_disk` (true), + /// or create a new data file from scratch (false). + pub(super) fn write_should_append(&self) -> bool { + match self.write_mode { + DirstateMapWriteMode::ForceAppend => true, + DirstateMapWriteMode::ForceNewDataFile => false, + DirstateMapWriteMode::Auto => { + let ratio = + self.unreachable_bytes as f32 / self.on_disk.len() as f32; + ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO + } + } + } + + fn get_node<'tree>( + &'tree self, + path: &HgPath, + ) -> Result>, DirstateV2ParseError> { + let mut children = self.root.as_ref(); + let mut components = path.components(); + let mut component = + components.next().expect("expected at least one components"); + loop { + if let Some(child) = children.get(component, self.on_disk)? { + if let Some(next_component) = components.next() { + component = next_component; + children = child.children(self.on_disk)?; + } else { + return Ok(Some(child)); + } + } else { + return Ok(None); + } + } + } + + pub fn has_node( + &self, + path: &HgPath, + ) -> Result { + let node = self.get_node(path)?; + Ok(node.is_some()) + } + + /// Returns a mutable reference to the node at `path` if it exists + /// + /// `each_ancestor` is a callback that is called for each ancestor node + /// when descending the tree. It is used to keep the different counters + /// of the `DirstateMap` up-to-date. + fn get_node_mut<'tree>( + &'tree mut self, + path: &HgPath, + each_ancestor: impl FnMut(&mut Node), + ) -> Result>, DirstateV2ParseError> { + Self::get_node_mut_inner( + self.on_disk, + &mut self.unreachable_bytes, + &mut self.root, + path, + each_ancestor, + ) + } + + /// Lower-level version of `get_node_mut`. + /// + /// This takes `root` instead of `&mut self` so that callers can mutate + /// other fields while the returned borrow is still valid. + /// + /// `each_ancestor` is a callback that is called for each ancestor node + /// when descending the tree. It is used to keep the different counters + /// of the `DirstateMap` up-to-date. + fn get_node_mut_inner<'tree>( + on_disk: &'on_disk [u8], + unreachable_bytes: &mut u32, + root: &'tree mut ChildNodes<'on_disk>, + path: &HgPath, + mut each_ancestor: impl FnMut(&mut Node), + ) -> Result>, DirstateV2ParseError> { + let mut children = root; + let mut components = path.components(); + let mut component = + components.next().expect("expected at least one components"); + loop { + if let Some(child) = children + .make_mut(on_disk, unreachable_bytes)? + .get_mut(component) + { + if let Some(next_component) = components.next() { + each_ancestor(child); + component = next_component; + children = &mut child.children; + } else { + return Ok(Some(child)); + } + } else { + return Ok(None); + } + } + } + + /// Get a mutable reference to the node at `path`, creating it if it does + /// not exist. + /// + /// `each_ancestor` is a callback that is called for each ancestor node + /// when descending the tree. It is used to keep the different counters + /// of the `DirstateMap` up-to-date. + fn get_or_insert_node<'tree, 'path>( + &'tree mut self, + path: &'path HgPath, + each_ancestor: impl FnMut(&mut Node), + ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { + Self::get_or_insert_node_inner( + self.on_disk, + &mut self.unreachable_bytes, + &mut self.root, + path, + WithBasename::to_cow_owned, + each_ancestor, + ) + } + + /// Lower-level version of `get_or_insert_node_inner`, which is used when + /// parsing disk data to remove allocations for new nodes. + fn get_or_insert_node_inner<'tree, 'path>( + on_disk: &'on_disk [u8], + unreachable_bytes: &mut u32, + root: &'tree mut ChildNodes<'on_disk>, + path: &'path HgPath, + to_cow: impl Fn( + WithBasename<&'path HgPath>, + ) -> WithBasename>, + mut each_ancestor: impl FnMut(&mut Node), + ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { + let mut child_nodes = root; + let mut inclusive_ancestor_paths = + WithBasename::inclusive_ancestors_of(path); + let mut ancestor_path = inclusive_ancestor_paths + .next() + .expect("expected at least one inclusive ancestor"); + loop { + let (_, child_node) = child_nodes + .make_mut(on_disk, unreachable_bytes)? + .raw_entry_mut() + .from_key(ancestor_path.base_name()) + .or_insert_with(|| (to_cow(ancestor_path), Node::default())); + if let Some(next) = inclusive_ancestor_paths.next() { + each_ancestor(child_node); + ancestor_path = next; + child_nodes = &mut child_node.children; + } else { + return Ok(child_node); + } + } + } + + #[allow(clippy::too_many_arguments)] + fn reset_state( + &mut self, + filename: &HgPath, + old_entry_opt: Option, + wc_tracked: bool, + p1_tracked: bool, + p2_info: bool, + has_meaningful_mtime: bool, + parent_file_data_opt: Option, + ) -> Result<(), DirstateError> { + let (had_entry, was_tracked) = match old_entry_opt { + Some(old_entry) => (true, old_entry.tracked()), + None => (false, false), + }; + let node = self.get_or_insert_node(filename, |ancestor| { + if !had_entry { + ancestor.descendants_with_entry_count += 1; + } + if was_tracked { + if !wc_tracked { + ancestor.tracked_descendants_count = ancestor + .tracked_descendants_count + .checked_sub(1) + .expect("tracked count to be >= 0"); + } + } else if wc_tracked { + ancestor.tracked_descendants_count += 1; + } + })?; + + let v2_data = if let Some(parent_file_data) = parent_file_data_opt { + DirstateV2Data { + wc_tracked, + p1_tracked, + p2_info, + mode_size: parent_file_data.mode_size, + mtime: if has_meaningful_mtime { + parent_file_data.mtime + } else { + None + }, + ..Default::default() + } + } else { + DirstateV2Data { + wc_tracked, + p1_tracked, + p2_info, + ..Default::default() + } + }; + node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data)); + if !had_entry { + self.nodes_with_entry_count += 1; + } + Ok(()) + } + + fn set_tracked( + &mut self, + filename: &HgPath, + old_entry_opt: Option, + ) -> Result { + let was_tracked = old_entry_opt.map_or(false, |e| e.tracked()); + let had_entry = old_entry_opt.is_some(); + let tracked_count_increment = u32::from(!was_tracked); + let mut new = false; + + let node = self.get_or_insert_node(filename, |ancestor| { + if !had_entry { + ancestor.descendants_with_entry_count += 1; + } + + ancestor.tracked_descendants_count += tracked_count_increment; + })?; + if let Some(old_entry) = old_entry_opt { + let mut e = old_entry; + if e.tracked() { + // XXX + // This is probably overkill for more case, but we need this to + // fully replace the `normallookup` call with `set_tracked` + // one. Consider smoothing this in the future. + e.set_possibly_dirty(); + } else { + new = true; + e.set_tracked(); + } + node.data = NodeData::Entry(e) + } else { + node.data = NodeData::Entry(DirstateEntry::new_tracked()); + self.nodes_with_entry_count += 1; + new = true; + }; + Ok(new) + } + + /// Set a node as untracked in the dirstate. + /// + /// It is the responsibility of the caller to remove the copy source and/or + /// the entry itself if appropriate. + /// + /// # Panics + /// + /// Panics if the node does not exist. + fn set_untracked( + &mut self, + filename: &HgPath, + old_entry: DirstateEntry, + ) -> Result<(), DirstateV2ParseError> { + let node = self + .get_node_mut(filename, |ancestor| { + ancestor.tracked_descendants_count = ancestor + .tracked_descendants_count + .checked_sub(1) + .expect("tracked_descendants_count should be >= 0"); + })? + .expect("node should exist"); + let mut new_entry = old_entry; + new_entry.set_untracked(); + node.data = NodeData::Entry(new_entry); + Ok(()) + } + + /// Set a node as clean in the dirstate. + /// + /// It is the responsibility of the caller to remove the copy source. + /// + /// # Panics + /// + /// Panics if the node does not exist. + fn set_clean( + &mut self, + filename: &HgPath, + old_entry: DirstateEntry, + mode: u32, + size: u32, + mtime: TruncatedTimestamp, + ) -> Result<(), DirstateError> { + let node = self + .get_node_mut(filename, |ancestor| { + if !old_entry.tracked() { + ancestor.tracked_descendants_count += 1; + } + })? + .expect("node should exist"); + let mut new_entry = old_entry; + new_entry.set_clean(mode, size, mtime); + node.data = NodeData::Entry(new_entry); + Ok(()) + } + + /// Set a node as possibly dirty in the dirstate. + /// + /// # Panics + /// + /// Panics if the node does not exist. + fn set_possibly_dirty( + &mut self, + filename: &HgPath, + ) -> Result<(), DirstateError> { + let node = self + .get_node_mut(filename, |_ancestor| {})? + .expect("node should exist"); + let entry = node.data.as_entry_mut().expect("entry should exist"); + entry.set_possibly_dirty(); + node.data = NodeData::Entry(*entry); + Ok(()) + } + + /// Clears the cached mtime for the (potential) folder at `path`. + pub(super) fn clear_cached_mtime( + &mut self, + path: &HgPath, + ) -> Result<(), DirstateV2ParseError> { + let node = match self.get_node_mut(path, |_ancestor| {})? { + Some(node) => node, + None => return Ok(()), + }; + if let NodeData::CachedDirectory { .. } = &node.data { + node.data = NodeData::None + } + Ok(()) + } + + /// Sets the cached mtime for the (potential) folder at `path`. + pub(super) fn set_cached_mtime( + &mut self, + path: &HgPath, + mtime: TruncatedTimestamp, + ) -> Result<(), DirstateV2ParseError> { + let node = match self.get_node_mut(path, |_ancestor| {})? { + Some(node) => node, + None => return Ok(()), + }; + match &node.data { + NodeData::Entry(_) => {} // Don’t overwrite an entry + NodeData::CachedDirectory { .. } | NodeData::None => { + node.data = NodeData::CachedDirectory { mtime } + } + } + Ok(()) + } + + fn iter_nodes<'tree>( + &'tree self, + ) -> impl Iterator< + Item = Result, DirstateV2ParseError>, + > + 'tree { + // Depth first tree traversal. + // + // If we could afford internal iteration and recursion, + // this would look like: + // + // ``` + // fn traverse_children( + // children: &ChildNodes, + // each: &mut impl FnMut(&Node), + // ) { + // for child in children.values() { + // traverse_children(&child.children, each); + // each(child); + // } + // } + // ``` + // + // However we want an external iterator and therefore can’t use the + // call stack. Use an explicit stack instead: + let mut stack = Vec::new(); + let mut iter = self.root.as_ref().iter(); + std::iter::from_fn(move || { + while let Some(child_node) = iter.next() { + let children = match child_node.children(self.on_disk) { + Ok(children) => children, + Err(error) => return Some(Err(error)), + }; + // Pseudo-recursion + let new_iter = children.iter(); + let old_iter = std::mem::replace(&mut iter, new_iter); + stack.push((child_node, old_iter)); + } + // Found the end of a `children.iter()` iterator. + if let Some((child_node, next_iter)) = stack.pop() { + // "Return" from pseudo-recursion by restoring state from the + // explicit stack + iter = next_iter; + + Some(Ok(child_node)) + } else { + // Reached the bottom of the stack, we’re done + None + } + }) + } + + fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow) { + if let Cow::Borrowed(path) = path { + *unreachable_bytes += path.len() as u32 + } + } + + pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) { + self.write_mode = write_mode; + } + + pub(crate) fn set_tracked_hint(&mut self, tracked_hint: bool) { + self.use_tracked_hint = tracked_hint; + } +} + +/// Sets the parameters for resetting a dirstate entry +pub struct DirstateEntryReset<'a> { + /// Which entry are we resetting + pub filename: &'a HgPath, + /// Whether the entry is tracked in the working copy + pub wc_tracked: bool, + /// Whether the entry is tracked in p1 + pub p1_tracked: bool, + /// Whether the entry has merge information + pub p2_info: bool, + /// Whether the entry's mtime should be trusted + pub has_meaningful_mtime: bool, + /// Information from the parent file data (from the manifest) + pub parent_file_data_opt: Option, + /// Set this to `true` if you are *certain* that there is no old entry for + /// this filename. Yield better performance in cases where we do a lot + /// of additions to the dirstate. + pub from_empty: bool, +} + +type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32)); + +impl OwningDirstateMap { + pub fn clear(&mut self) { + self.with_dmap_mut(|map| { + map.root = Default::default(); + map.nodes_with_entry_count = 0; + map.nodes_with_copy_source_count = 0; + map.unreachable_bytes = map.on_disk.len() as u32; + }); + } + + pub fn set_tracked( + &mut self, + filename: &HgPath, + ) -> Result { + let old_entry_opt = self.get(filename)?; + self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt)) + } + + pub fn set_untracked( + &mut self, + filename: &HgPath, + ) -> Result { + let old_entry_opt = self.get(filename)?; + match old_entry_opt { + None => Ok(false), + Some(old_entry) => { + if !old_entry.tracked() { + // `DirstateMap::set_untracked` is not a noop if + // already not tracked as it will decrement the + // tracked counters while going down. + return Ok(true); + } + if old_entry.added() { + // Untracking an "added" entry will just result in a + // worthless entry (and other parts of the code will + // complain about it), just drop it entirely. + self.drop_entry_and_copy_source(filename)?; + return Ok(true); + } + if !old_entry.p2_info() { + self.copy_map_remove(filename)?; + } + + self.with_dmap_mut(|map| { + map.set_untracked(filename, old_entry)?; + Ok(true) + }) + } + } + } + + pub fn set_clean( + &mut self, + filename: &HgPath, + mode: u32, + size: u32, + mtime: TruncatedTimestamp, + ) -> Result<(), DirstateError> { + let old_entry = match self.get(filename)? { + None => { + return Err( + DirstateMapError::PathNotFound(filename.into()).into() + ) + } + Some(e) => e, + }; + self.copy_map_remove(filename)?; + self.with_dmap_mut(|map| { + map.set_clean(filename, old_entry, mode, size, mtime) + }) + } + + pub fn set_possibly_dirty( + &mut self, + filename: &HgPath, + ) -> Result<(), DirstateError> { + if self.get(filename)?.is_none() { + return Err(DirstateMapError::PathNotFound(filename.into()).into()); + } + self.with_dmap_mut(|map| map.set_possibly_dirty(filename)) + } + + pub fn reset_state( + &mut self, + reset: DirstateEntryReset, + ) -> Result<(), DirstateError> { + if !(reset.p1_tracked || reset.p2_info || reset.wc_tracked) { + self.drop_entry_and_copy_source(reset.filename)?; + return Ok(()); + } + if !reset.from_empty { + self.copy_map_remove(reset.filename)?; + } + + let old_entry_opt = if reset.from_empty { + None + } else { + self.get(reset.filename)? + }; + + self.with_dmap_mut(|map| { + map.reset_state( + reset.filename, + old_entry_opt, + reset.wc_tracked, + reset.p1_tracked, + reset.p2_info, + reset.has_meaningful_mtime, + reset.parent_file_data_opt, + ) + }) + } + + pub fn drop_entry_and_copy_source( + &mut self, + filename: &HgPath, + ) -> Result<(), DirstateError> { + let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked()); + struct Dropped { + was_tracked: bool, + had_entry: bool, + had_copy_source: bool, + } + + /// If this returns `Ok(Some((dropped, removed)))`, then + /// + /// * `dropped` is about the leaf node that was at `filename` + /// * `removed` is whether this particular level of recursion just + /// removed a node in `nodes`. + fn recur<'on_disk>( + on_disk: &'on_disk [u8], + unreachable_bytes: &mut u32, + nodes: &mut ChildNodes<'on_disk>, + path: &HgPath, + ) -> Result, DirstateV2ParseError> { + let (first_path_component, rest_of_path) = + path.split_first_component(); + let nodes = nodes.make_mut(on_disk, unreachable_bytes)?; + let node = if let Some(node) = nodes.get_mut(first_path_component) + { + node + } else { + return Ok(None); + }; + let dropped; + if let Some(rest) = rest_of_path { + if let Some((d, removed)) = recur( + on_disk, + unreachable_bytes, + &mut node.children, + rest, + )? { + dropped = d; + if dropped.had_entry { + node.descendants_with_entry_count = node + .descendants_with_entry_count + .checked_sub(1) + .expect( + "descendants_with_entry_count should be >= 0", + ); + } + if dropped.was_tracked { + node.tracked_descendants_count = node + .tracked_descendants_count + .checked_sub(1) + .expect( + "tracked_descendants_count should be >= 0", + ); + } + + // Directory caches must be invalidated when removing a + // child node + if removed { + if let NodeData::CachedDirectory { .. } = &node.data { + node.data = NodeData::None + } + } + } else { + return Ok(None); + } + } else { + let entry = node.data.as_entry(); + let was_tracked = entry.map_or(false, |entry| entry.tracked()); + let had_entry = entry.is_some(); + if had_entry { + node.data = NodeData::None + } + let mut had_copy_source = false; + if let Some(source) = &node.copy_source { + DirstateMap::count_dropped_path( + unreachable_bytes, + Cow::Borrowed(source), + ); + had_copy_source = true; + node.copy_source = None + } + dropped = Dropped { + was_tracked, + had_entry, + had_copy_source, + }; + } + // After recursion, for both leaf (rest_of_path is None) nodes and + // parent nodes, remove a node if it just became empty. + let remove = !node.data.has_entry() + && node.copy_source.is_none() + && node.children.is_empty(); + if remove { + let (key, _) = + nodes.remove_entry(first_path_component).unwrap(); + DirstateMap::count_dropped_path( + unreachable_bytes, + Cow::Borrowed(key.full_path()), + ) + } + Ok(Some((dropped, remove))) + } + + self.with_dmap_mut(|map| { + if let Some((dropped, _removed)) = recur( + map.on_disk, + &mut map.unreachable_bytes, + &mut map.root, + filename, + )? { + if dropped.had_entry { + map.nodes_with_entry_count = map + .nodes_with_entry_count + .checked_sub(1) + .expect("nodes_with_entry_count should be >= 0"); + } + if dropped.had_copy_source { + map.nodes_with_copy_source_count = map + .nodes_with_copy_source_count + .checked_sub(1) + .expect("nodes_with_copy_source_count should be >= 0"); + } + } else { + debug_assert!(!was_tracked); + } + Ok(()) + }) + } + + pub fn has_tracked_dir( + &mut self, + directory: &HgPath, + ) -> Result { + self.with_dmap_mut(|map| { + if let Some(node) = map.get_node(directory)? { + // A node without a `DirstateEntry` was created to hold child + // nodes, and is therefore a directory. + let is_dir = node.entry()?.is_none(); + Ok(is_dir && node.tracked_descendants_count() > 0) + } else { + Ok(false) + } + }) + } + + pub fn has_dir( + &mut self, + directory: &HgPath, + ) -> Result { + self.with_dmap_mut(|map| { + if let Some(node) = map.get_node(directory)? { + // A node without a `DirstateEntry` was created to hold child + // nodes, and is therefore a directory. + let is_dir = node.entry()?.is_none(); + Ok(is_dir && node.descendants_with_entry_count() > 0) + } else { + Ok(false) + } + }) + } + + #[logging_timer::time("trace")] + pub fn pack_v1( + &self, + parents: DirstateParents, + ) -> Result, DirstateError> { + let map = self.get_map(); + // Optizimation (to be measured?): pre-compute size to avoid `Vec` + // reallocations + let mut size = parents.as_bytes().len(); + for node in map.iter_nodes() { + let node = node?; + if node.entry()?.is_some() { + size += packed_entry_size( + node.full_path(map.on_disk)?, + node.copy_source(map.on_disk)?, + ); + } + } + + let mut packed = Vec::with_capacity(size); + packed.extend(parents.as_bytes()); + + for node in map.iter_nodes() { + let node = node?; + if let Some(entry) = node.entry()? { + pack_entry( + node.full_path(map.on_disk)?, + &entry, + node.copy_source(map.on_disk)?, + &mut packed, + ); + } + } + Ok(packed) + } + + /// Returns new data and metadata together with whether that data should be + /// appended to the existing data file whose content is at + /// `map.on_disk` (true), instead of written to a new data file + /// (false), and the previous size of data on disk. + #[logging_timer::time("trace")] + pub fn pack_v2( + &self, + write_mode: DirstateMapWriteMode, + ) -> Result<(Vec, on_disk::TreeMetadata, bool, usize), DirstateError> + { + let map = self.get_map(); + on_disk::write(map, write_mode) + } + + /// `callback` allows the caller to process and do something with the + /// results of the status. This is needed to do so efficiently (i.e. + /// without cloning the `DirstateStatus` object with its paths) because + /// we need to borrow from `Self`. + pub fn with_status( + &mut self, + matcher: &(dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec, + options: StatusOptions, + callback: impl for<'r> FnOnce( + Result<(DirstateStatus<'r>, Vec), StatusError>, + ) -> R, + ) -> R { + self.with_dmap_mut(|map| { + callback(super::status::status( + map, + matcher, + root_dir, + ignore_files, + options, + )) + }) + } + + pub fn copy_map_len(&self) -> usize { + let map = self.get_map(); + map.nodes_with_copy_source_count as usize + } + + pub fn copy_map_iter(&self) -> CopyMapIter<'_> { + let map = self.get_map(); + Box::new(filter_map_results(map.iter_nodes(), move |node| { + Ok(if let Some(source) = node.copy_source(map.on_disk)? { + Some((node.full_path(map.on_disk)?, source)) + } else { + None + }) + })) + } + + pub fn copy_map_contains_key( + &self, + key: &HgPath, + ) -> Result { + let map = self.get_map(); + Ok(if let Some(node) = map.get_node(key)? { + node.has_copy_source() + } else { + false + }) + } + + pub fn copy_map_get( + &self, + key: &HgPath, + ) -> Result, DirstateV2ParseError> { + let map = self.get_map(); + if let Some(node) = map.get_node(key)? { + if let Some(source) = node.copy_source(map.on_disk)? { + return Ok(Some(source)); + } + } + Ok(None) + } + + pub fn copy_map_remove( + &mut self, + key: &HgPath, + ) -> Result, DirstateV2ParseError> { + self.with_dmap_mut(|map| { + let count = &mut map.nodes_with_copy_source_count; + let unreachable_bytes = &mut map.unreachable_bytes; + Ok(DirstateMap::get_node_mut_inner( + map.on_disk, + unreachable_bytes, + &mut map.root, + key, + |_ancestor| {}, + )? + .and_then(|node| { + if let Some(source) = &node.copy_source { + *count = count + .checked_sub(1) + .expect("nodes_with_copy_source_count should be >= 0"); + DirstateMap::count_dropped_path( + unreachable_bytes, + Cow::Borrowed(source), + ); + } + node.copy_source.take().map(Cow::into_owned) + })) + }) + } + + pub fn copy_map_insert( + &mut self, + key: &HgPath, + value: &HgPath, + ) -> Result, DirstateV2ParseError> { + self.with_dmap_mut(|map| { + let node = map.get_or_insert_node(key, |_ancestor| {})?; + let had_copy_source = node.copy_source.is_none(); + let old = node + .copy_source + .replace(value.to_owned().into()) + .map(Cow::into_owned); + if had_copy_source { + map.nodes_with_copy_source_count += 1 + } + Ok(old) + }) + } + + pub fn len(&self) -> usize { + let map = self.get_map(); + map.nodes_with_entry_count as usize + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn contains_key( + &self, + key: &HgPath, + ) -> Result { + Ok(self.get(key)?.is_some()) + } + + pub fn get( + &self, + key: &HgPath, + ) -> Result, DirstateV2ParseError> { + let map = self.get_map(); + Ok(if let Some(node) = map.get_node(key)? { + node.entry()? + } else { + None + }) + } + + pub fn iter(&self) -> StateMapIter<'_> { + let map = self.get_map(); + Box::new(filter_map_results(map.iter_nodes(), move |node| { + Ok(if let Some(entry) = node.entry()? { + Some((node.full_path(map.on_disk)?, entry)) + } else { + None + }) + })) + } + + pub fn iter_tracked_dirs( + &mut self, + ) -> Result< + Box< + dyn Iterator> + + Send + + '_, + >, + DirstateError, + > { + let map = self.get_map(); + let on_disk = map.on_disk; + Ok(Box::new(filter_map_results( + map.iter_nodes(), + move |node| { + Ok(if node.tracked_descendants_count() > 0 { + Some(node.full_path(on_disk)?) + } else { + None + }) + }, + ))) + } + + /// Only public because it needs to be exposed to the Python layer. + /// It is not the full `setparents` logic, only the parts that mutate the + /// entries. + pub fn setparents_fixup( + &mut self, + ) -> Result, DirstateV2ParseError> { + // XXX + // All the copying and re-querying is quite inefficient, but this is + // still a lot better than doing it from Python. + // + // The better solution is to develop a mechanism for `iter_mut`, + // which will be a lot more involved: we're dealing with a lazy, + // append-mostly, tree-like data structure. This will do for now. + let mut copies = vec![]; + let mut files_with_p2_info = vec![]; + for res in self.iter() { + let (path, entry) = res?; + if entry.p2_info() { + files_with_p2_info.push(path.to_owned()) + } + } + self.with_dmap_mut(|map| { + for path in files_with_p2_info.iter() { + let node = map.get_or_insert_node(path, |_| {})?; + let entry = + node.data.as_entry_mut().expect("entry should exist"); + entry.drop_merge_data(); + if let Some(source) = node.copy_source.take().as_deref() { + copies.push((path.to_owned(), source.to_owned())); + } + } + Ok(copies) + }) + } + + pub fn debug_iter( + &self, + all: bool, + ) -> Box< + dyn Iterator> + + Send + + '_, + > { + let map = self.get_map(); + Box::new(filter_map_results(map.iter_nodes(), move |node| { + let debug_tuple = if let Some(entry) = node.entry()? { + entry.debug_tuple() + } else if !all { + return Ok(None); + } else if let Some(mtime) = node.cached_directory_mtime()? { + (b' ', 0, -1, mtime.truncated_seconds() as i32) + } else { + (b' ', 0, -1, -1) + }; + Ok(Some((node.full_path(map.on_disk)?, debug_tuple))) + })) + } +} +#[cfg(test)] +mod tests { + use super::*; + + /// Shortcut to return tracked descendants of a path. + /// Panics if the path does not exist. + fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 { + let path = dbg!(HgPath::new(path)); + let node = map.get_map().get_node(path); + node.unwrap().unwrap().tracked_descendants_count() + } + + /// Shortcut to return descendants with an entry. + /// Panics if the path does not exist. + fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 { + let path = dbg!(HgPath::new(path)); + let node = map.get_map().get_node(path); + node.unwrap().unwrap().descendants_with_entry_count() + } + + fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) { + let path = dbg!(HgPath::new(path)); + let node = map.get_map().get_node(path); + assert!(node.unwrap().is_none()); + } + + /// Shortcut for path creation in tests + fn p(b: &[u8]) -> &HgPath { + HgPath::new(b) + } + + /// Test the very simple case a single tracked file + #[test] + fn test_tracked_descendants_simple() -> Result<(), DirstateError> { + let mut map = OwningDirstateMap::new_empty(vec![], None); + assert_eq!(map.len(), 0); + + map.set_tracked(p(b"some/nested/path"))?; + + assert_eq!(map.len(), 1); + assert_eq!(tracked_descendants(&map, b"some"), 1); + assert_eq!(tracked_descendants(&map, b"some/nested"), 1); + assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0); + + map.set_untracked(p(b"some/nested/path"))?; + assert_eq!(map.len(), 0); + assert!(map.get_map().get_node(p(b"some"))?.is_none()); + + Ok(()) + } + + /// Test the simple case of all tracked, but multiple files + #[test] + fn test_tracked_descendants_multiple() -> Result<(), DirstateError> { + let mut map = OwningDirstateMap::new_empty(vec![], None); + + map.set_tracked(p(b"some/nested/path"))?; + map.set_tracked(p(b"some/nested/file"))?; + // one layer without any files to test deletion cascade + map.set_tracked(p(b"some/other/nested/path"))?; + map.set_tracked(p(b"root_file"))?; + map.set_tracked(p(b"some/file"))?; + map.set_tracked(p(b"some/file2"))?; + map.set_tracked(p(b"some/file3"))?; + + assert_eq!(map.len(), 7); + assert_eq!(tracked_descendants(&map, b"some"), 6); + assert_eq!(tracked_descendants(&map, b"some/nested"), 2); + assert_eq!(tracked_descendants(&map, b"some/other"), 1); + assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); + assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0); + + map.set_untracked(p(b"some/nested/path"))?; + assert_eq!(map.len(), 6); + assert_eq!(tracked_descendants(&map, b"some"), 5); + assert_eq!(tracked_descendants(&map, b"some/nested"), 1); + assert_eq!(tracked_descendants(&map, b"some/other"), 1); + assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); + + map.set_untracked(p(b"some/nested/file"))?; + assert_eq!(map.len(), 5); + assert_eq!(tracked_descendants(&map, b"some"), 4); + assert_eq!(tracked_descendants(&map, b"some/other"), 1); + assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); + assert_does_not_exist(&map, b"some_nested"); + + map.set_untracked(p(b"some/other/nested/path"))?; + assert_eq!(map.len(), 4); + assert_eq!(tracked_descendants(&map, b"some"), 3); + assert_does_not_exist(&map, b"some/other"); + + map.set_untracked(p(b"root_file"))?; + assert_eq!(map.len(), 3); + assert_eq!(tracked_descendants(&map, b"some"), 3); + assert_does_not_exist(&map, b"root_file"); + + map.set_untracked(p(b"some/file"))?; + assert_eq!(map.len(), 2); + assert_eq!(tracked_descendants(&map, b"some"), 2); + assert_does_not_exist(&map, b"some/file"); + + map.set_untracked(p(b"some/file2"))?; + assert_eq!(map.len(), 1); + assert_eq!(tracked_descendants(&map, b"some"), 1); + assert_does_not_exist(&map, b"some/file2"); + + map.set_untracked(p(b"some/file3"))?; + assert_eq!(map.len(), 0); + assert_does_not_exist(&map, b"some/file3"); + + Ok(()) + } + + /// Check with a mix of tracked and non-tracked items + #[test] + fn test_tracked_descendants_different() -> Result<(), DirstateError> { + let mut map = OwningDirstateMap::new_empty(vec![], None); + + // A file that was just added + map.set_tracked(p(b"some/nested/path"))?; + // This has no information, the dirstate should ignore it + let reset = DirstateEntryReset { + filename: p(b"some/file"), + wc_tracked: false, + p1_tracked: false, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + assert_does_not_exist(&map, b"some/file"); + + // A file that was removed + let reset = DirstateEntryReset { + filename: p(b"some/nested/file"), + wc_tracked: false, + p1_tracked: true, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked()); + // Only present in p2 + let reset = DirstateEntryReset { + filename: p(b"some/file3"), + wc_tracked: false, + p1_tracked: false, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + assert!(!map.get(p(b"some/file3"))?.unwrap().tracked()); + // A file that was merged + let reset = DirstateEntryReset { + filename: p(b"root_file"), + wc_tracked: true, + p1_tracked: true, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + assert!(map.get(p(b"root_file"))?.unwrap().tracked()); + // A file that is added, with info from p2 + // XXX is that actually possible? + let reset = DirstateEntryReset { + filename: p(b"some/file2"), + wc_tracked: true, + p1_tracked: false, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + assert!(map.get(p(b"some/file2"))?.unwrap().tracked()); + // A clean file + // One layer without any files to test deletion cascade + let reset = DirstateEntryReset { + filename: p(b"some/other/nested/path"), + wc_tracked: true, + p1_tracked: true, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked()); + + assert_eq!(map.len(), 6); + assert_eq!(tracked_descendants(&map, b"some"), 3); + assert_eq!(descendants_with_an_entry(&map, b"some"), 5); + assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1); + assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0); + assert_eq!( + descendants_with_an_entry(&map, b"some/other/nested/path"), + 0 + ); + assert_eq!(tracked_descendants(&map, b"some/nested"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2); + + // might as well check this + map.set_untracked(p(b"path/does/not/exist"))?; + assert_eq!(map.len(), 6); + + map.set_untracked(p(b"some/other/nested/path"))?; + // It is set untracked but not deleted since it held other information + assert_eq!(map.len(), 6); + assert_eq!(tracked_descendants(&map, b"some"), 2); + assert_eq!(descendants_with_an_entry(&map, b"some"), 5); + assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1); + assert_eq!(tracked_descendants(&map, b"some/nested"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2); + + map.set_untracked(p(b"some/nested/path"))?; + // It is set untracked *and* deleted since it was only added + assert_eq!(map.len(), 5); + assert_eq!(tracked_descendants(&map, b"some"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some"), 4); + assert_eq!(tracked_descendants(&map, b"some/nested"), 0); + assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1); + assert_does_not_exist(&map, b"some/nested/path"); + + map.set_untracked(p(b"root_file"))?; + // Untracked but not deleted + assert_eq!(map.len(), 5); + assert!(map.get(p(b"root_file"))?.is_some()); + + map.set_untracked(p(b"some/file2"))?; + assert_eq!(map.len(), 5); + assert_eq!(tracked_descendants(&map, b"some"), 0); + assert!(map.get(p(b"some/file2"))?.is_some()); + + map.set_untracked(p(b"some/file3"))?; + assert_eq!(map.len(), 5); + assert_eq!(tracked_descendants(&map, b"some"), 0); + assert!(map.get(p(b"some/file3"))?.is_some()); + + Ok(()) + } + + /// Check that copies counter is correctly updated + #[test] + fn test_copy_source() -> Result<(), DirstateError> { + let mut map = OwningDirstateMap::new_empty(vec![], None); + + // Clean file + let reset = DirstateEntryReset { + filename: p(b"files/clean"), + wc_tracked: true, + p1_tracked: true, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // Merged file + let reset = DirstateEntryReset { + filename: p(b"files/from_p2"), + wc_tracked: true, + p1_tracked: true, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // Removed file + let reset = DirstateEntryReset { + filename: p(b"removed"), + wc_tracked: false, + p1_tracked: true, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // Added file + let reset = DirstateEntryReset { + filename: p(b"files/added"), + wc_tracked: true, + p1_tracked: false, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // Add copy + map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?; + assert_eq!(map.copy_map_len(), 1); + + // Copy override + map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?; + assert_eq!(map.copy_map_len(), 1); + + // Multiple copies + map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?; + assert_eq!(map.copy_map_len(), 2); + + map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?; + assert_eq!(map.copy_map_len(), 3); + + // Added, so the entry is completely removed + map.set_untracked(p(b"files/added"))?; + assert_does_not_exist(&map, b"files/added"); + assert_eq!(map.copy_map_len(), 2); + + // Removed, so the entry is kept around, so is its copy + map.set_untracked(p(b"removed"))?; + assert!(map.get(p(b"removed"))?.is_some()); + assert_eq!(map.copy_map_len(), 2); + + // Clean, so the entry is kept around, but not its copy + map.set_untracked(p(b"files/clean"))?; + assert!(map.get(p(b"files/clean"))?.is_some()); + assert_eq!(map.copy_map_len(), 1); + + map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?; + assert_eq!(map.copy_map_len(), 2); + + // Info from p2, so its copy source info is kept around + map.set_untracked(p(b"files/from_p2"))?; + assert!(map.get(p(b"files/from_p2"))?.is_some()); + assert_eq!(map.copy_map_len(), 2); + + Ok(()) + } + + /// Test with "on disk" data. For the sake of this test, the "on disk" data + /// does not actually come from the disk, but it's opaque to the code being + /// tested. + #[test] + fn test_on_disk() -> Result<(), DirstateError> { + // First let's create some data to put "on disk" + let mut map = OwningDirstateMap::new_empty(vec![], None); + + // A file that was just added + map.set_tracked(p(b"some/nested/added"))?; + map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?; + + // A file that was removed + let reset = DirstateEntryReset { + filename: p(b"some/nested/removed"), + wc_tracked: false, + p1_tracked: true, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // Only present in p2 + let reset = DirstateEntryReset { + filename: p(b"other/p2_info_only"), + wc_tracked: false, + p1_tracked: false, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + map.copy_map_insert( + p(b"other/p2_info_only"), + p(b"other/p2_info_copy_source"), + )?; + // A file that was merged + let reset = DirstateEntryReset { + filename: p(b"merged"), + wc_tracked: true, + p1_tracked: true, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // A file that is added, with info from p2 + // XXX is that actually possible? + let reset = DirstateEntryReset { + filename: p(b"other/added_with_p2"), + wc_tracked: true, + p1_tracked: false, + p2_info: true, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + // One layer without any files to test deletion cascade + // A clean file + let reset = DirstateEntryReset { + filename: p(b"some/other/nested/clean"), + wc_tracked: true, + p1_tracked: true, + p2_info: false, + has_meaningful_mtime: false, + parent_file_data_opt: None, + from_empty: false, + }; + map.reset_state(reset)?; + + let (packed, metadata, _should_append, _old_data_size) = + map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?; + let packed_len = packed.len(); + assert!(packed_len > 0); + + // Recreate "from disk" + let mut map = OwningDirstateMap::new_v2( + packed, + packed_len, + metadata.as_bytes(), + vec![], + None, + )?; + + // Check that everything is accounted for + assert!(map.contains_key(p(b"some/nested/added"))?); + assert!(map.contains_key(p(b"some/nested/removed"))?); + assert!(map.contains_key(p(b"merged"))?); + assert!(map.contains_key(p(b"other/p2_info_only"))?); + assert!(map.contains_key(p(b"other/added_with_p2"))?); + assert!(map.contains_key(p(b"some/other/nested/clean"))?); + assert_eq!( + map.copy_map_get(p(b"some/nested/added"))?, + Some(p(b"added_copy_source")) + ); + assert_eq!( + map.copy_map_get(p(b"other/p2_info_only"))?, + Some(p(b"other/p2_info_copy_source")) + ); + assert_eq!(tracked_descendants(&map, b"some"), 2); + assert_eq!(descendants_with_an_entry(&map, b"some"), 3); + assert_eq!(tracked_descendants(&map, b"other"), 1); + assert_eq!(descendants_with_an_entry(&map, b"other"), 2); + assert_eq!(tracked_descendants(&map, b"some/other"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1); + assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1); + assert_eq!(tracked_descendants(&map, b"some/nested"), 1); + assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2); + assert_eq!(map.len(), 6); + assert_eq!(map.get_map().unreachable_bytes, 0); + assert_eq!(map.copy_map_len(), 2); + + // Shouldn't change anything since it's already not tracked + map.set_untracked(p(b"some/nested/removed"))?; + assert_eq!(map.get_map().unreachable_bytes, 0); + + if let ChildNodes::InMemory(_) = map.get_map().root { + panic!("root should not have been mutated") + } + // We haven't mutated enough (nothing, actually), we should still be in + // the append strategy + assert!(map.get_map().write_should_append()); + + // But this mutates the structure, so there should be unreachable_bytes + assert!(map.set_untracked(p(b"some/nested/added"))?); + let unreachable_bytes = map.get_map().unreachable_bytes; + assert!(unreachable_bytes > 0); + + if let ChildNodes::OnDisk(_) = map.get_map().root { + panic!("root should have been mutated") + } + + // This should not mutate the structure either, since `root` has + // already been mutated along with its direct children. + map.set_untracked(p(b"merged"))?; + assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes); + + if let NodeRef::InMemory(_, _) = + map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() + { + panic!("'other/added_with_p2' should not have been mutated") + } + // But this should, since it's in a different path + // than `some/nested/add` + map.set_untracked(p(b"other/added_with_p2"))?; + assert!(map.get_map().unreachable_bytes > unreachable_bytes); + + if let NodeRef::OnDisk(_) = + map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() + { + panic!("'other/added_with_p2' should have been mutated") + } + + // We have rewritten most of the tree, we should create a new file + assert!(!map.get_map().write_should_append()); + + Ok(()) + } +} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/entry.rs --- a/rust/hg-core/src/dirstate/entry.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/dirstate/entry.rs Mon Nov 04 11:00:58 2024 +0100 @@ -1,4 +1,4 @@ -use crate::dirstate_tree::on_disk::DirstateV2ParseError; +use crate::dirstate::on_disk::DirstateV2ParseError; use crate::errors::HgError; use bitflags::bitflags; use std::fs; diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/on_disk.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate/on_disk.rs Mon Nov 04 11:00:58 2024 +0100 @@ -0,0 +1,940 @@ +//! The "version 2" disk representation of the dirstate +//! +//! See `mercurial/helptext/internals/dirstate-v2.txt` + +use crate::dirstate::dirstate_map::DirstateVersion; +use crate::dirstate::dirstate_map::{ + self, DirstateMap, DirstateMapWriteMode, NodeRef, +}; +use crate::dirstate::path_with_basename::WithBasename; +use crate::dirstate::{DirstateV2Data, TruncatedTimestamp}; +use crate::errors::{HgError, IoResultExt}; +use crate::repo::Repo; +use crate::requirements::DIRSTATE_TRACKED_HINT_V1; +use crate::utils::hg_path::HgPath; +use crate::DirstateEntry; +use crate::DirstateError; +use crate::DirstateParents; +use bitflags::bitflags; +use bytes_cast::unaligned::{U16Be, U32Be}; +use bytes_cast::BytesCast; +use format_bytes::format_bytes; +use rand::Rng; +use std::borrow::Cow; +use std::fmt::Write; +use uuid::Uuid; + +use super::dirstate_map::DirstateIdentity; + +/// Added at the start of `.hg/dirstate` when the "v2" format is used. +/// This a redundant sanity check more than an actual "magic number" since +/// `.hg/requires` already governs which format should be used. +pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; + +/// Keep space for 256-bit hashes +const STORED_NODE_ID_BYTES: usize = 32; + +/// … even though only 160 bits are used for now, with SHA-1 +const USED_NODE_ID_BYTES: usize = 20; + +pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; +pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; + +/// Must match constants of the same names in `mercurial/dirstateutils/v2.py` +const TREE_METADATA_SIZE: usize = 44; +const NODE_SIZE: usize = 44; + +/// Make sure that size-affecting changes are made knowingly +#[allow(unused)] +fn static_assert_size_of() { + let _ = std::mem::transmute::; + let _ = std::mem::transmute::; + let _ = std::mem::transmute::; +} + +// Must match `HEADER` in `mercurial/dirstateutils/docket.py` +#[derive(BytesCast)] +#[repr(C)] +struct DocketHeader { + marker: [u8; V2_FORMAT_MARKER.len()], + parent_1: [u8; STORED_NODE_ID_BYTES], + parent_2: [u8; STORED_NODE_ID_BYTES], + + metadata: TreeMetadata, + + /// Counted in bytes + data_size: Size, + + uuid_size: u8, +} + +pub struct Docket<'on_disk> { + header: &'on_disk DocketHeader, + pub uuid: &'on_disk [u8], +} + +/// Fields are documented in the *Tree metadata in the docket file* +/// section of `mercurial/helptext/internals/dirstate-v2.txt` +#[derive(BytesCast)] +#[repr(C)] +pub struct TreeMetadata { + root_nodes: ChildNodes, + nodes_with_entry_count: Size, + nodes_with_copy_source_count: Size, + unreachable_bytes: Size, + unused: [u8; 4], + + /// See *Optional hash of ignore patterns* section of + /// `mercurial/helptext/internals/dirstate-v2.txt` + ignore_patterns_hash: IgnorePatternsHash, +} + +/// Fields are documented in the *The data file format* +/// section of `mercurial/helptext/internals/dirstate-v2.txt` +#[derive(BytesCast, Debug)] +#[repr(C)] +pub(super) struct Node { + full_path: PathSlice, + + /// In bytes from `self.full_path.start` + base_name_start: PathSize, + + copy_source: OptPathSlice, + children: ChildNodes, + pub(super) descendants_with_entry_count: Size, + pub(super) tracked_descendants_count: Size, + flags: U16Be, + size: U32Be, + mtime: PackedTruncatedTimestamp, +} + +bitflags! { + #[repr(C)] + struct Flags: u16 { + const WDIR_TRACKED = 1 << 0; + const P1_TRACKED = 1 << 1; + const P2_INFO = 1 << 2; + const MODE_EXEC_PERM = 1 << 3; + const MODE_IS_SYMLINK = 1 << 4; + const HAS_FALLBACK_EXEC = 1 << 5; + const FALLBACK_EXEC = 1 << 6; + const HAS_FALLBACK_SYMLINK = 1 << 7; + const FALLBACK_SYMLINK = 1 << 8; + const EXPECTED_STATE_IS_MODIFIED = 1 << 9; + const HAS_MODE_AND_SIZE = 1 <<10; + const HAS_MTIME = 1 <<11; + const MTIME_SECOND_AMBIGUOUS = 1 << 12; + const DIRECTORY = 1 <<13; + const ALL_UNKNOWN_RECORDED = 1 <<14; + const ALL_IGNORED_RECORDED = 1 <<15; + } +} + +/// Duration since the Unix epoch +#[derive(BytesCast, Copy, Clone, Debug)] +#[repr(C)] +struct PackedTruncatedTimestamp { + truncated_seconds: U32Be, + nanoseconds: U32Be, +} + +/// Counted in bytes from the start of the file +/// +/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. +type Offset = U32Be; + +/// Counted in number of items +/// +/// NOTE: we choose not to support counting more than 4 billion nodes anywhere. +type Size = U32Be; + +/// Counted in bytes +/// +/// NOTE: we choose not to support file names/paths longer than 64 KiB. +type PathSize = U16Be; + +/// A contiguous sequence of `len` times `Node`, representing the child nodes +/// of either some other node or of the repository root. +/// +/// Always sorted by ascending `full_path`, to allow binary search. +/// Since nodes with the same parent nodes also have the same parent path, +/// only the `base_name`s need to be compared during binary search. +#[derive(BytesCast, Copy, Clone, Debug)] +#[repr(C)] +struct ChildNodes { + start: Offset, + len: Size, +} + +/// A `HgPath` of `len` bytes +#[derive(BytesCast, Copy, Clone, Debug)] +#[repr(C)] +struct PathSlice { + start: Offset, + len: PathSize, +} + +/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes +type OptPathSlice = PathSlice; + +/// Unexpected file format found in `.hg/dirstate` with the "v2" format. +/// +/// This should only happen if Mercurial is buggy or a repository is corrupted. +#[derive(Debug)] +pub struct DirstateV2ParseError { + message: String, +} + +impl DirstateV2ParseError { + pub fn new>(message: S) -> Self { + Self { + message: message.into(), + } + } +} + +impl From for HgError { + fn from(e: DirstateV2ParseError) -> Self { + HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message)) + } +} + +impl From for crate::DirstateError { + fn from(error: DirstateV2ParseError) -> Self { + HgError::from(error).into() + } +} + +impl TreeMetadata { + pub fn as_bytes(&self) -> &[u8] { + BytesCast::as_bytes(self) + } +} + +impl<'on_disk> Docket<'on_disk> { + /// Generate the identifier for a new data file + /// + /// TODO: support the `HGTEST_UUIDFILE` environment variable. + /// See `mercurial/revlogutils/docket.py` + pub fn new_uid() -> String { + const ID_LENGTH: usize = 8; + let mut id = String::with_capacity(ID_LENGTH); + let mut rng = rand::thread_rng(); + for _ in 0..ID_LENGTH { + // One random hexadecimal digit. + // `unwrap` never panics because `impl Write for String` + // never returns an error. + write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap(); + } + id + } + + pub fn serialize( + parents: DirstateParents, + tree_metadata: TreeMetadata, + data_size: u64, + uuid: &[u8], + ) -> Result, std::num::TryFromIntError> { + let header = DocketHeader { + marker: *V2_FORMAT_MARKER, + parent_1: parents.p1.pad_to_256_bits(), + parent_2: parents.p2.pad_to_256_bits(), + metadata: tree_metadata, + data_size: u32::try_from(data_size)?.into(), + uuid_size: uuid.len().try_into()?, + }; + let header = header.as_bytes(); + let mut docket = Vec::with_capacity(header.len() + uuid.len()); + docket.extend_from_slice(header); + docket.extend_from_slice(uuid); + Ok(docket) + } + + pub fn parents(&self) -> DirstateParents { + use crate::Node; + let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) + .unwrap(); + let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) + .unwrap(); + DirstateParents { p1, p2 } + } + + pub fn tree_metadata(&self) -> &[u8] { + self.header.metadata.as_bytes() + } + + pub fn data_size(&self) -> usize { + // This `unwrap` could only panic on a 16-bit CPU + self.header.data_size.get().try_into().unwrap() + } + + pub fn data_filename(&self) -> String { + String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap() + } +} + +pub fn read_docket( + on_disk: &[u8], +) -> Result, DirstateV2ParseError> { + let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| { + DirstateV2ParseError::new(format!("when reading docket, {}", e)) + })?; + let uuid_size = header.uuid_size as usize; + if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { + Ok(Docket { header, uuid }) + } else { + Err(DirstateV2ParseError::new( + "invalid format marker or uuid size", + )) + } +} + +pub(super) fn read<'on_disk>( + on_disk: &'on_disk [u8], + metadata: &[u8], + uuid: Vec, + identity: Option, +) -> Result, DirstateV2ParseError> { + if on_disk.is_empty() { + let mut map = DirstateMap::empty(on_disk); + map.identity = identity; + map.old_uuid = Some(uuid); + map.dirstate_version = DirstateVersion::V2; + return Ok(map); + } + let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| { + DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e)) + })?; + let dirstate_map = DirstateMap { + on_disk, + root: dirstate_map::ChildNodes::OnDisk( + read_nodes(on_disk, meta.root_nodes).map_err(|mut e| { + e.message = format!("{}, when reading root notes", e.message); + e + })?, + ), + nodes_with_entry_count: meta.nodes_with_entry_count.get(), + nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), + ignore_patterns_hash: meta.ignore_patterns_hash, + unreachable_bytes: meta.unreachable_bytes.get(), + old_data_size: on_disk.len(), + old_uuid: Some(uuid), + identity, + dirstate_version: DirstateVersion::V2, + write_mode: DirstateMapWriteMode::Auto, + use_tracked_hint: false, + }; + Ok(dirstate_map) +} + +impl Node { + pub(super) fn full_path<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { + read_hg_path(on_disk, self.full_path) + } + + pub(super) fn base_name_start( + &self, + ) -> Result { + let start = self.base_name_start.get(); + if start < self.full_path.len.get() { + let start = usize::from(start); + Ok(start) + } else { + Err(DirstateV2ParseError::new("not enough bytes for base name")) + } + } + + pub(super) fn base_name<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { + let full_path = self.full_path(on_disk)?; + let base_name_start = self.base_name_start()?; + Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) + } + + pub(super) fn path<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result, DirstateV2ParseError> { + Ok(WithBasename::from_raw_parts( + Cow::Borrowed(self.full_path(on_disk)?), + self.base_name_start()?, + )) + } + + pub(super) fn has_copy_source(&self) -> bool { + self.copy_source.start.get() != 0 + } + + pub(super) fn copy_source<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result, DirstateV2ParseError> { + Ok(if self.has_copy_source() { + Some(read_hg_path(on_disk, self.copy_source)?) + } else { + None + }) + } + + fn flags(&self) -> Flags { + Flags::from_bits_truncate(self.flags.get()) + } + + fn has_entry(&self) -> bool { + self.flags().intersects( + Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, + ) + } + + pub(super) fn node_data( + &self, + ) -> Result { + if self.has_entry() { + Ok(dirstate_map::NodeData::Entry(self.assume_entry()?)) + } else if let Some(mtime) = self.cached_directory_mtime()? { + Ok(dirstate_map::NodeData::CachedDirectory { mtime }) + } else { + Ok(dirstate_map::NodeData::None) + } + } + + pub(super) fn cached_directory_mtime( + &self, + ) -> Result, DirstateV2ParseError> { + // For now we do not have code to handle the absence of + // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is + // unset. + if self.flags().contains(Flags::DIRECTORY) + && self.flags().contains(Flags::HAS_MTIME) + && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) + { + Ok(Some(self.mtime()?)) + } else { + Ok(None) + } + } + + fn synthesize_unix_mode(&self) -> u32 { + // Some platforms' libc don't have the same type (MacOS uses i32 here) + #[allow(clippy::unnecessary_cast)] + let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) { + libc::S_IFLNK as u32 + } else { + libc::S_IFREG as u32 + }; + let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) { + 0o755 + } else { + 0o644 + }; + file_type | permissions + } + + fn mtime(&self) -> Result { + let mut m: TruncatedTimestamp = self.mtime.try_into()?; + if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { + m.second_ambiguous = true; + } + Ok(m) + } + + fn assume_entry(&self) -> Result { + // TODO: convert through raw bits instead? + let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED); + let p1_tracked = self.flags().contains(Flags::P1_TRACKED); + let p2_info = self.flags().contains(Flags::P2_INFO); + let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) + && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) + { + Some((self.synthesize_unix_mode(), self.size.into())) + } else { + None + }; + let mtime = if self.flags().contains(Flags::HAS_MTIME) + && !self.flags().contains(Flags::DIRECTORY) + && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) + { + Some(self.mtime()?) + } else { + None + }; + let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC) + { + Some(self.flags().contains(Flags::FALLBACK_EXEC)) + } else { + None + }; + let fallback_symlink = + if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) { + Some(self.flags().contains(Flags::FALLBACK_SYMLINK)) + } else { + None + }; + Ok(DirstateEntry::from_v2_data(DirstateV2Data { + wc_tracked, + p1_tracked, + p2_info, + mode_size, + mtime, + fallback_exec, + fallback_symlink, + })) + } + + pub(super) fn entry( + &self, + ) -> Result, DirstateV2ParseError> { + if self.has_entry() { + Ok(Some(self.assume_entry()?)) + } else { + Ok(None) + } + } + + pub(super) fn children<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result<&'on_disk [Node], DirstateV2ParseError> { + read_nodes(on_disk, self.children) + } + + pub(super) fn to_in_memory_node<'on_disk>( + &self, + on_disk: &'on_disk [u8], + ) -> Result, DirstateV2ParseError> { + Ok(dirstate_map::Node { + children: dirstate_map::ChildNodes::OnDisk( + self.children(on_disk)?, + ), + copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), + data: self.node_data()?, + descendants_with_entry_count: self + .descendants_with_entry_count + .get(), + tracked_descendants_count: self.tracked_descendants_count.get(), + }) + } + + fn from_dirstate_entry( + entry: &DirstateEntry, + ) -> (Flags, U32Be, PackedTruncatedTimestamp) { + let DirstateV2Data { + wc_tracked, + p1_tracked, + p2_info, + mode_size: mode_size_opt, + mtime: mtime_opt, + fallback_exec, + fallback_symlink, + } = entry.v2_data(); + // TODO: convert through raw flag bits instead? + let mut flags = Flags::empty(); + flags.set(Flags::WDIR_TRACKED, wc_tracked); + flags.set(Flags::P1_TRACKED, p1_tracked); + flags.set(Flags::P2_INFO, p2_info); + // Some platforms' libc don't have the same type (MacOS uses i32 here) + #[allow(clippy::unnecessary_cast)] + let size = if let Some((m, s)) = mode_size_opt { + let exec_perm = m & (libc::S_IXUSR as u32) != 0; + let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32; + flags.set(Flags::MODE_EXEC_PERM, exec_perm); + flags.set(Flags::MODE_IS_SYMLINK, is_symlink); + flags.insert(Flags::HAS_MODE_AND_SIZE); + s.into() + } else { + 0.into() + }; + let mtime = if let Some(m) = mtime_opt { + flags.insert(Flags::HAS_MTIME); + if m.second_ambiguous { + flags.insert(Flags::MTIME_SECOND_AMBIGUOUS); + }; + m.into() + } else { + PackedTruncatedTimestamp::null() + }; + if let Some(f_exec) = fallback_exec { + flags.insert(Flags::HAS_FALLBACK_EXEC); + if f_exec { + flags.insert(Flags::FALLBACK_EXEC); + } + } + if let Some(f_symlink) = fallback_symlink { + flags.insert(Flags::HAS_FALLBACK_SYMLINK); + if f_symlink { + flags.insert(Flags::FALLBACK_SYMLINK); + } + } + (flags, size, mtime) + } +} + +fn read_hg_path( + on_disk: &[u8], + slice: PathSlice, +) -> Result<&HgPath, DirstateV2ParseError> { + read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) +} + +fn read_nodes( + on_disk: &[u8], + slice: ChildNodes, +) -> Result<&[Node], DirstateV2ParseError> { + read_slice(on_disk, slice.start, slice.len.get()) +} + +fn read_slice( + on_disk: &[u8], + start: Offset, + len: Len, +) -> Result<&[T], DirstateV2ParseError> +where + T: BytesCast, + Len: TryInto, +{ + // Either `usize::MAX` would result in "out of bounds" error since a single + // `&[u8]` cannot occupy the entire addess space. + let start = start.get().try_into().unwrap_or(usize::MAX); + let len = len.try_into().unwrap_or(usize::MAX); + let bytes = match on_disk.get(start..) { + Some(bytes) => bytes, + None => { + return Err(DirstateV2ParseError::new( + "not enough bytes from disk", + )) + } + }; + T::slice_from_bytes(bytes, len) + .map_err(|e| { + DirstateV2ParseError::new(format!("when reading a slice, {}", e)) + }) + .map(|(slice, _rest)| slice) +} + +/// Returns new data and metadata, together with whether that data should be +/// appended to the existing data file whose content is at +/// `dirstate_map.on_disk` (true), instead of written to a new data file +/// (false), and the previous size of data on disk. +pub(super) fn write( + dirstate_map: &DirstateMap, + write_mode: DirstateMapWriteMode, +) -> Result<(Vec, TreeMetadata, bool, usize), DirstateError> { + let append = match write_mode { + DirstateMapWriteMode::Auto => dirstate_map.write_should_append(), + DirstateMapWriteMode::ForceNewDataFile => false, + DirstateMapWriteMode::ForceAppend => true, + }; + if append { + log::trace!("appending to the dirstate data file"); + } else { + log::trace!("creating new dirstate data file"); + } + + // This ignores the space for paths, and for nodes without an entry. + // TODO: better estimate? Skip the `Vec` and write to a file directly? + let size_guess = std::mem::size_of::() + * dirstate_map.nodes_with_entry_count as usize; + + let mut writer = Writer { + dirstate_map, + append, + out: Vec::with_capacity(size_guess), + }; + + let root_nodes = dirstate_map.root.as_ref(); + for node in root_nodes.iter() { + // Catch some corruptions before we write to disk + let full_path = node.full_path(dirstate_map.on_disk)?; + let base_name = node.base_name(dirstate_map.on_disk)?; + if full_path != base_name { + let explanation = format!( + "Dirstate root node '{}' is not at the root", + full_path + ); + return Err(HgError::corrupted(explanation).into()); + } + } + let root_nodes = writer.write_nodes(root_nodes)?; + + let unreachable_bytes = if append { + dirstate_map.unreachable_bytes + } else { + 0 + }; + let meta = TreeMetadata { + root_nodes, + nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), + nodes_with_copy_source_count: dirstate_map + .nodes_with_copy_source_count + .into(), + unreachable_bytes: unreachable_bytes.into(), + unused: [0; 4], + ignore_patterns_hash: dirstate_map.ignore_patterns_hash, + }; + Ok((writer.out, meta, append, dirstate_map.old_data_size)) +} + +struct Writer<'dmap, 'on_disk> { + dirstate_map: &'dmap DirstateMap<'on_disk>, + append: bool, + out: Vec, +} + +impl Writer<'_, '_> { + fn write_nodes( + &mut self, + nodes: dirstate_map::ChildNodesRef, + ) -> Result { + // Reuse already-written nodes if possible + if self.append { + if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { + let start = self.on_disk_offset_of(nodes_slice).expect( + "dirstate-v2 OnDisk nodes not found within on_disk", + ); + let len = child_nodes_len_from_usize(nodes_slice.len()); + return Ok(ChildNodes { start, len }); + } + } + + // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has + // undefined iteration order. Sort to enable binary search in the + // written file. + let nodes = nodes.sorted(); + let nodes_len = nodes.len(); + + // First accumulate serialized nodes in a `Vec` + let mut on_disk_nodes = Vec::with_capacity(nodes_len); + for node in nodes { + let children = node.children(self.dirstate_map.on_disk)?; + let full_path = node.full_path(self.dirstate_map.on_disk)?; + self.check_children(&children, full_path)?; + + let children = self.write_nodes(children)?; + let full_path = self.write_path(full_path.as_bytes()); + let copy_source = if let Some(source) = + node.copy_source(self.dirstate_map.on_disk)? + { + self.write_path(source.as_bytes()) + } else { + PathSlice { + start: 0.into(), + len: 0.into(), + } + }; + on_disk_nodes.push(match node { + NodeRef::InMemory(path, node) => { + let (flags, size, mtime) = match &node.data { + dirstate_map::NodeData::Entry(entry) => { + Node::from_dirstate_entry(entry) + } + dirstate_map::NodeData::CachedDirectory { mtime } => { + // we currently never set a mtime if unknown file + // are present. + // So if we have a mtime for a directory, we know + // they are no unknown + // files and we + // blindly set ALL_UNKNOWN_RECORDED. + // + // We never set ALL_IGNORED_RECORDED since we + // don't track that case + // currently. + let mut flags = Flags::DIRECTORY + | Flags::HAS_MTIME + | Flags::ALL_UNKNOWN_RECORDED; + if mtime.second_ambiguous { + flags.insert(Flags::MTIME_SECOND_AMBIGUOUS) + } + (flags, 0.into(), (*mtime).into()) + } + dirstate_map::NodeData::None => ( + Flags::DIRECTORY, + 0.into(), + PackedTruncatedTimestamp::null(), + ), + }; + Node { + children, + copy_source, + full_path, + base_name_start: u16::try_from(path.base_name_start()) + // Could only panic for paths over 64 KiB + .expect("dirstate-v2 path length overflow") + .into(), + descendants_with_entry_count: node + .descendants_with_entry_count + .into(), + tracked_descendants_count: node + .tracked_descendants_count + .into(), + flags: flags.bits().into(), + size, + mtime, + } + } + NodeRef::OnDisk(node) => Node { + children, + copy_source, + full_path, + ..*node + }, + }) + } + // … so we can write them contiguously, after writing everything else + // they refer to. + let start = self.current_offset(); + let len = child_nodes_len_from_usize(nodes_len); + self.out.extend(on_disk_nodes.as_bytes()); + Ok(ChildNodes { start, len }) + } + + /// Catch some dirstate corruptions before writing them to disk + fn check_children( + &mut self, + children: &dirstate_map::ChildNodesRef, + full_path: &HgPath, + ) -> Result<(), DirstateError> { + for child in children.iter() { + let child_full_path = + child.full_path(self.dirstate_map.on_disk)?; + + let prefix_length = child_full_path.len() + // remove the filename + - child.base_name(self.dirstate_map.on_disk)?.len() + // remove the slash + - 1; + + let child_prefix = &child_full_path.as_bytes()[..prefix_length]; + + if child_prefix != full_path.as_bytes() { + let explanation = format!( + "dirstate child node's path '{}' \ + does not start with its parent's path '{}'", + child_full_path, full_path, + ); + + return Err(HgError::corrupted(explanation).into()); + } + } + Ok(()) + } + + /// If the given slice of items is within `on_disk`, returns its offset + /// from the start of `on_disk`. + fn on_disk_offset_of(&self, slice: &[T]) -> Option + where + T: BytesCast, + { + fn address_range(slice: &[u8]) -> std::ops::RangeInclusive { + let start = slice.as_ptr() as usize; + let end = start + slice.len(); + start..=end + } + let slice_addresses = address_range(slice.as_bytes()); + let on_disk_addresses = address_range(self.dirstate_map.on_disk); + if on_disk_addresses.contains(slice_addresses.start()) + && on_disk_addresses.contains(slice_addresses.end()) + { + let offset = slice_addresses.start() - on_disk_addresses.start(); + Some(offset_from_usize(offset)) + } else { + None + } + } + + fn current_offset(&mut self) -> Offset { + let mut offset = self.out.len(); + if self.append { + offset += self.dirstate_map.on_disk.len() + } + offset_from_usize(offset) + } + + fn write_path(&mut self, slice: &[u8]) -> PathSlice { + let len = path_len_from_usize(slice.len()); + // Reuse an already-written path if possible + if self.append { + if let Some(start) = self.on_disk_offset_of(slice) { + return PathSlice { start, len }; + } + } + let start = self.current_offset(); + self.out.extend(slice.as_bytes()); + PathSlice { start, len } + } +} + +fn offset_from_usize(x: usize) -> Offset { + u32::try_from(x) + // Could only panic for a dirstate file larger than 4 GiB + .expect("dirstate-v2 offset overflow") + .into() +} + +fn child_nodes_len_from_usize(x: usize) -> Size { + u32::try_from(x) + // Could only panic with over 4 billion nodes + .expect("dirstate-v2 slice length overflow") + .into() +} + +fn path_len_from_usize(x: usize) -> PathSize { + u16::try_from(x) + // Could only panic for paths over 64 KiB + .expect("dirstate-v2 path length overflow") + .into() +} + +impl From for PackedTruncatedTimestamp { + fn from(timestamp: TruncatedTimestamp) -> Self { + Self { + truncated_seconds: timestamp.truncated_seconds().into(), + nanoseconds: timestamp.nanoseconds().into(), + } + } +} + +impl TryFrom for TruncatedTimestamp { + type Error = DirstateV2ParseError; + + fn try_from( + timestamp: PackedTruncatedTimestamp, + ) -> Result { + Self::from_already_truncated( + timestamp.truncated_seconds.get(), + timestamp.nanoseconds.get(), + false, + ) + } +} +impl PackedTruncatedTimestamp { + fn null() -> Self { + Self { + truncated_seconds: 0.into(), + nanoseconds: 0.into(), + } + } +} + +/// Write a new tracked key to disk. +/// See `format.use-dirstate-tracked-hint` config help for more details. +pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> { + // TODO move this to the dirstate itself once it grows a `dirty` flag and + // can reason about which context it needs to write this in. + // For now, only this fast-path needs to think about the tracked hint. + // Use [`crate::dirstate::dirstate_map::DirstateMap:: + // use_tracked_hint`] instead of looking at the requirements once + // refactored. + if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) { + return Ok(()); + } + // TODO use `hg_vfs` once the `InnerRevlog` is in. + let path = repo + .working_directory_path() + .join(".hg/dirstate-tracked-hint"); + std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path) +} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/owning.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate/owning.rs Mon Nov 04 11:00:58 2024 +0100 @@ -0,0 +1,98 @@ +use crate::{DirstateError, DirstateParents}; + +use super::dirstate_map::{DirstateIdentity, DirstateMap}; +use self_cell::self_cell; +use std::ops::Deref; + +self_cell!( + /// Keep a `DirstateMap<'owner>` next to the `owner` buffer that it + /// borrows. + pub struct OwningDirstateMap { + owner: Box + Send>, + #[covariant] + dependent: DirstateMap, + } +); + +impl OwningDirstateMap { + pub fn new_empty( + on_disk: OnDisk, + identity: Option, + ) -> Self + where + OnDisk: Deref + Send + 'static, + { + let on_disk = Box::new(on_disk); + + OwningDirstateMap::new(on_disk, |bytes| { + let mut empty = DirstateMap::empty(bytes); + empty.identity = identity; + empty + }) + } + + pub fn new_v1( + on_disk: OnDisk, + identity: Option, + ) -> Result<(Self, DirstateParents), DirstateError> + where + OnDisk: Deref + Send + 'static, + { + let on_disk = Box::new(on_disk); + let mut parents = DirstateParents::NULL; + + Ok(( + OwningDirstateMap::try_new(on_disk, |bytes| { + DirstateMap::new_v1(bytes, identity).map(|(dmap, p)| { + parents = p.unwrap_or(DirstateParents::NULL); + dmap + }) + })?, + parents, + )) + } + + pub fn new_v2( + on_disk: OnDisk, + data_size: usize, + metadata: &[u8], + uuid: Vec, + identity: Option, + ) -> Result + where + OnDisk: Deref + Send + 'static, + { + let on_disk = Box::new(on_disk); + + OwningDirstateMap::try_new(on_disk, |bytes| { + DirstateMap::new_v2(bytes, data_size, metadata, uuid, identity) + }) + } + + pub fn with_dmap_mut( + &mut self, + f: impl FnOnce(&mut DirstateMap) -> R, + ) -> R { + self.with_dependent_mut(|_owner, dmap| f(dmap)) + } + + pub fn get_map(&self) -> &DirstateMap { + self.borrow_dependent() + } + + pub fn on_disk(&self) -> &[u8] { + self.borrow_owner() + } + + pub fn old_uuid(&self) -> Option<&[u8]> { + self.get_map().old_uuid.as_deref() + } + + pub fn old_identity(&self) -> Option { + self.get_map().identity + } + + pub fn old_data_size(&self) -> usize { + self.get_map().old_data_size + } +} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/path_with_basename.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate/path_with_basename.rs Mon Nov 04 11:00:58 2024 +0100 @@ -0,0 +1,187 @@ +use crate::utils::hg_path::HgPath; +use std::borrow::{Borrow, Cow}; + +/// Wraps `HgPath` or `HgPathBuf` to make it behave "as" its last path +/// component, a.k.a. its base name (as in Python’s `os.path.basename`), but +/// also allow recovering the full path. +/// +/// "Behaving as" means that equality and comparison consider only the base +/// name, and `std::borrow::Borrow` is implemented to return only the base +/// name. This allows using the base name as a map key while still being able +/// to recover the full path, in a single memory allocation. +#[derive(Debug)] +pub struct WithBasename { + full_path: T, + + /// The position after the last slash separator in `full_path`, or `0` + /// if there is no slash. + base_name_start: usize, +} + +impl WithBasename { + pub fn full_path(&self) -> &T { + &self.full_path + } +} + +fn find_base_name_start(full_path: &HgPath) -> usize { + if let Some(last_slash_position) = + full_path.as_bytes().iter().rposition(|&byte| byte == b'/') + { + last_slash_position + 1 + } else { + 0 + } +} + +impl> WithBasename { + pub fn new(full_path: T) -> Self { + Self { + base_name_start: find_base_name_start(full_path.as_ref()), + full_path, + } + } + + pub fn from_raw_parts(full_path: T, base_name_start: usize) -> Self { + debug_assert_eq!( + base_name_start, + find_base_name_start(full_path.as_ref()) + ); + Self { + base_name_start, + full_path, + } + } + + pub fn base_name(&self) -> &HgPath { + HgPath::new( + &self.full_path.as_ref().as_bytes()[self.base_name_start..], + ) + } + + pub fn base_name_start(&self) -> usize { + self.base_name_start + } +} + +impl> Borrow for WithBasename { + fn borrow(&self) -> &HgPath { + self.base_name() + } +} + +impl> std::hash::Hash for WithBasename { + fn hash(&self, hasher: &mut H) { + self.base_name().hash(hasher) + } +} + +impl + PartialEq> PartialEq for WithBasename { + fn eq(&self, other: &Self) -> bool { + self.base_name() == other.base_name() + } +} + +impl + Eq> Eq for WithBasename {} + +impl + PartialOrd> PartialOrd for WithBasename { + fn partial_cmp(&self, other: &Self) -> Option { + self.base_name().partial_cmp(other.base_name()) + } +} + +impl + Ord> Ord for WithBasename { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.base_name().cmp(other.base_name()) + } +} + +impl<'a> WithBasename<&'a HgPath> { + pub fn to_cow_borrowed(self) -> WithBasename> { + WithBasename { + full_path: Cow::Borrowed(self.full_path), + base_name_start: self.base_name_start, + } + } + + pub fn to_cow_owned<'b>(self) -> WithBasename> { + WithBasename { + full_path: Cow::Owned(self.full_path.to_owned()), + base_name_start: self.base_name_start, + } + } +} + +impl<'a> WithBasename<&'a HgPath> { + /// Returns an iterator of `WithBasename<&HgPath>` for the ancestor + /// directory paths of the given `path`, as well as `path` itself. + /// + /// For example, the full paths of inclusive ancestors of "a/b/c" are "a", + /// "a/b", and "a/b/c" in that order. + pub fn inclusive_ancestors_of( + path: &'a HgPath, + ) -> impl Iterator> { + let mut slash_positions = + path.as_bytes().iter().enumerate().filter_map(|(i, &byte)| { + if byte == b'/' { + Some(i) + } else { + None + } + }); + let mut opt_next_component_start = Some(0); + std::iter::from_fn(move || { + opt_next_component_start.take().map(|next_component_start| { + if let Some(slash_pos) = slash_positions.next() { + opt_next_component_start = Some(slash_pos + 1); + Self { + full_path: HgPath::new(&path.as_bytes()[..slash_pos]), + base_name_start: next_component_start, + } + } else { + // Not setting `opt_next_component_start` here: there will + // be no iteration after this one because `.take()` set it + // to `None`. + Self { + full_path: path, + base_name_start: next_component_start, + } + } + }) + }) + } +} + +#[test] +fn test() { + let a = WithBasename::new(HgPath::new("a").to_owned()); + assert_eq!(&**a.full_path(), HgPath::new(b"a")); + assert_eq!(a.base_name(), HgPath::new(b"a")); + + let cba = WithBasename::new(HgPath::new("c/b/a").to_owned()); + assert_eq!(&**cba.full_path(), HgPath::new(b"c/b/a")); + assert_eq!(cba.base_name(), HgPath::new(b"a")); + + assert_eq!(a, cba); + let borrowed: &HgPath = cba.borrow(); + assert_eq!(borrowed, HgPath::new("a")); +} + +#[test] +fn test_inclusive_ancestors() { + let mut iter = WithBasename::inclusive_ancestors_of(HgPath::new("a/bb/c")); + + let next = iter.next().unwrap(); + assert_eq!(*next.full_path(), HgPath::new("a")); + assert_eq!(next.base_name(), HgPath::new("a")); + + let next = iter.next().unwrap(); + assert_eq!(*next.full_path(), HgPath::new("a/bb")); + assert_eq!(next.base_name(), HgPath::new("bb")); + + let next = iter.next().unwrap(); + assert_eq!(*next.full_path(), HgPath::new("a/bb/c")); + assert_eq!(next.base_name(), HgPath::new("c")); + + assert!(iter.next().is_none()); +} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate/status.rs --- a/rust/hg-core/src/dirstate/status.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/dirstate/status.rs Mon Nov 04 11:00:58 2024 +0100 @@ -9,12 +9,31 @@ //! It is currently missing a lot of functionality compared to the Python one //! and will only be triggered in narrow cases. +use crate::dirstate::dirstate_map::BorrowedPath; +use crate::dirstate::dirstate_map::ChildNodesRef; +use crate::dirstate::dirstate_map::DirstateMap; +use crate::dirstate::dirstate_map::DirstateVersion; +use crate::dirstate::dirstate_map::NodeRef; use crate::dirstate::entry::TruncatedTimestamp; -use crate::dirstate_tree::on_disk::DirstateV2ParseError; -use crate::{ - utils::hg_path::{HgPath, HgPathError}, - PatternError, -}; +use crate::dirstate::on_disk::DirstateV2ParseError; +use crate::matchers::get_ignore_function; +use crate::matchers::{Matcher, VisitChildrenSet}; +use crate::utils::files::filesystem_now; +use crate::utils::files::get_bytes_from_os_string; +use crate::utils::files::get_bytes_from_path; +use crate::utils::files::get_path_from_bytes; +use crate::utils::hg_path::hg_path_to_path_buf; +use crate::utils::hg_path::HgPath; +use crate::PatternFileWarning; +use crate::{utils::hg_path::HgPathError, PatternError}; +use once_cell::sync::OnceCell; +use rayon::prelude::*; +use sha1::{Digest, Sha1}; +use std::io; +use std::os::unix::prelude::FileTypeExt; +use std::path::Path; +use std::path::PathBuf; +use std::sync::Mutex; use std::{borrow::Cow, fmt}; @@ -147,3 +166,1006 @@ } } } + +/// Returns the status of the working directory compared to its parent +/// changeset. +/// +/// This algorithm is based on traversing the filesystem tree (`fs` in function +/// and variable names) and dirstate tree at the same time. The core of this +/// traversal is the recursive `traverse_fs_directory_and_dirstate` function +/// and its use of `itertools::merge_join_by`. When reaching a path that only +/// exists in one of the two trees, depending on information requested by +/// `options` we may need to traverse the remaining subtree. +#[logging_timer::time("trace")] +pub fn status<'dirstate>( + dmap: &'dirstate mut DirstateMap, + matcher: &(dyn Matcher + Sync), + root_dir: PathBuf, + ignore_files: Vec, + options: StatusOptions, +) -> Result<(DirstateStatus<'dirstate>, Vec), StatusError> +{ + // Also cap for a Python caller of this function, but don't complain if + // the global threadpool has already been set since this code path is also + // being used by `rhg`, which calls this early. + let _ = crate::utils::cap_default_rayon_threads(); + + let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) = + if options.list_ignored || options.list_unknown { + let (ignore_fn, warnings, changed) = match dmap.dirstate_version { + DirstateVersion::V1 => { + let (ignore_fn, warnings) = get_ignore_function( + ignore_files, + &root_dir, + &mut |_source, _pattern_bytes| {}, + )?; + (ignore_fn, warnings, None) + } + DirstateVersion::V2 => { + let mut hasher = Sha1::new(); + let (ignore_fn, warnings) = get_ignore_function( + ignore_files, + &root_dir, + &mut |source, pattern_bytes| { + // If inside the repo, use the relative version to + // make it deterministic inside tests. + // The performance hit should be negligible. + let source = source + .strip_prefix(&root_dir) + .unwrap_or(source); + let source = get_bytes_from_path(source); + + let mut subhasher = Sha1::new(); + subhasher.update(pattern_bytes); + let patterns_hash = subhasher.finalize(); + + hasher.update(source); + hasher.update(b" "); + hasher.update(patterns_hash); + hasher.update(b"\n"); + }, + )?; + let new_hash = *hasher.finalize().as_ref(); + let changed = new_hash != dmap.ignore_patterns_hash; + dmap.ignore_patterns_hash = new_hash; + (ignore_fn, warnings, Some(changed)) + } + }; + (ignore_fn, warnings, changed) + } else { + (Box::new(|&_| true), vec![], None) + }; + + let filesystem_time_at_status_start = + filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from); + + // If the repository is under the current directory, prefer using a + // relative path, so the kernel needs to traverse fewer directory in every + // call to `read_dir` or `symlink_metadata`. + // This is effective in the common case where the current directory is the + // repository root. + + // TODO: Better yet would be to use libc functions like `openat` and + // `fstatat` to remove such repeated traversals entirely, but the standard + // library does not provide APIs based on those. + // Maybe with a crate like https://crates.io/crates/openat instead? + let root_dir = if let Some(relative) = std::env::current_dir() + .ok() + .and_then(|cwd| root_dir.strip_prefix(cwd).ok()) + { + relative + } else { + &root_dir + }; + + let outcome = DirstateStatus { + filesystem_time_at_status_start, + ..Default::default() + }; + let common = StatusCommon { + dmap, + options, + matcher, + ignore_fn, + outcome: Mutex::new(outcome), + ignore_patterns_have_changed: patterns_changed, + new_cacheable_directories: Default::default(), + outdated_cached_directories: Default::default(), + filesystem_time_at_status_start, + }; + let is_at_repo_root = true; + let hg_path = &BorrowedPath::OnDisk(HgPath::new("")); + let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path); + let root_cached_mtime = None; + // If the path we have for the repository root is a symlink, do follow it. + // (As opposed to symlinks within the working directory which are not + // followed, using `std::fs::symlink_metadata`.) + common.traverse_fs_directory_and_dirstate( + &has_ignored_ancestor, + dmap.root.as_ref(), + hg_path, + &DirEntry { + hg_path: Cow::Borrowed(HgPath::new(b"")), + fs_path: Cow::Borrowed(root_dir), + symlink_metadata: None, + file_type: FakeFileType::Directory, + }, + root_cached_mtime, + is_at_repo_root, + )?; + if let Some(file_set) = common.matcher.file_set() { + for file in file_set { + if !file.is_empty() && !dmap.has_node(file)? { + let path = hg_path_to_path_buf(file)?; + if let io::Result::Err(error) = + root_dir.join(path).symlink_metadata() + { + common.io_error(error, file) + } + } + } + } + let mut outcome = common.outcome.into_inner().unwrap(); + let new_cacheable = common.new_cacheable_directories.into_inner().unwrap(); + let outdated = common.outdated_cached_directories.into_inner().unwrap(); + + outcome.dirty = common.ignore_patterns_have_changed == Some(true) + || !outdated.is_empty() + || (!new_cacheable.is_empty() + && dmap.dirstate_version == DirstateVersion::V2); + + // Remove outdated mtimes before adding new mtimes, in case a given + // directory is both + for path in &outdated { + dmap.clear_cached_mtime(path)?; + } + for (path, mtime) in &new_cacheable { + dmap.set_cached_mtime(path, *mtime)?; + } + + Ok((outcome, warnings)) +} + +/// Bag of random things needed by various parts of the algorithm. Reduces the +/// number of parameters passed to functions. +struct StatusCommon<'a, 'tree, 'on_disk: 'tree> { + dmap: &'tree DirstateMap<'on_disk>, + options: StatusOptions, + matcher: &'a (dyn Matcher + Sync), + ignore_fn: IgnoreFnType<'a>, + outcome: Mutex>, + /// New timestamps of directories to be used for caching their readdirs + new_cacheable_directories: + Mutex, TruncatedTimestamp)>>, + /// Used to invalidate the readdir cache of directories + outdated_cached_directories: Mutex>>, + + /// Whether ignore files like `.hgignore` have changed since the previous + /// time a `status()` call wrote their hash to the dirstate. `None` means + /// we don’t know as this run doesn’t list either ignored or uknown files + /// and therefore isn’t reading `.hgignore`. + ignore_patterns_have_changed: Option, + + /// The current time at the start of the `status()` algorithm, as measured + /// and possibly truncated by the filesystem. + filesystem_time_at_status_start: Option, +} + +enum Outcome { + Modified, + Added, + Removed, + Deleted, + Clean, + Ignored, + Unknown, + Unsure, +} + +/// Lazy computation of whether a given path has a hgignored +/// ancestor. +struct HasIgnoredAncestor<'a> { + /// `path` and `parent` constitute the inputs to the computation, + /// `cache` stores the outcome. + path: &'a HgPath, + parent: Option<&'a HasIgnoredAncestor<'a>>, + cache: OnceCell, +} + +impl<'a> HasIgnoredAncestor<'a> { + fn create( + parent: Option<&'a HasIgnoredAncestor<'a>>, + path: &'a HgPath, + ) -> HasIgnoredAncestor<'a> { + Self { + path, + parent, + cache: OnceCell::new(), + } + } + + fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool { + match self.parent { + None => false, + Some(parent) => { + *(self.cache.get_or_init(|| { + parent.force(ignore_fn) || ignore_fn(self.path) + })) + } + } + } +} + +impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { + fn push_outcome( + &self, + which: Outcome, + dirstate_node: &NodeRef<'tree, 'on_disk>, + ) -> Result<(), DirstateV2ParseError> { + let path = dirstate_node + .full_path_borrowed(self.dmap.on_disk)? + .detach_from_tree(); + let copy_source = if self.options.list_copies { + dirstate_node + .copy_source_borrowed(self.dmap.on_disk)? + .map(|source| source.detach_from_tree()) + } else { + None + }; + self.push_outcome_common(which, path, copy_source); + Ok(()) + } + + fn push_outcome_without_copy_source( + &self, + which: Outcome, + path: &BorrowedPath<'_, 'on_disk>, + ) { + self.push_outcome_common(which, path.detach_from_tree(), None) + } + + fn push_outcome_common( + &self, + which: Outcome, + path: HgPathCow<'on_disk>, + copy_source: Option>, + ) { + let mut outcome = self.outcome.lock().unwrap(); + let vec = match which { + Outcome::Modified => &mut outcome.modified, + Outcome::Added => &mut outcome.added, + Outcome::Removed => &mut outcome.removed, + Outcome::Deleted => &mut outcome.deleted, + Outcome::Clean => &mut outcome.clean, + Outcome::Ignored => &mut outcome.ignored, + Outcome::Unknown => &mut outcome.unknown, + Outcome::Unsure => &mut outcome.unsure, + }; + vec.push(StatusPath { path, copy_source }); + } + + fn read_dir( + &self, + hg_path: &HgPath, + fs_path: &Path, + is_at_repo_root: bool, + ) -> Result, ()> { + DirEntry::read_dir(fs_path, is_at_repo_root) + .map_err(|error| self.io_error(error, hg_path)) + } + + fn io_error(&self, error: std::io::Error, hg_path: &HgPath) { + let errno = error.raw_os_error().expect("expected real OS error"); + self.outcome + .lock() + .unwrap() + .bad + .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) + } + + fn check_for_outdated_directory_cache( + &self, + dirstate_node: &NodeRef<'tree, 'on_disk>, + ) -> Result { + if self.ignore_patterns_have_changed == Some(true) + && dirstate_node.cached_directory_mtime()?.is_some() + { + self.outdated_cached_directories.lock().unwrap().push( + dirstate_node + .full_path_borrowed(self.dmap.on_disk)? + .detach_from_tree(), + ); + return Ok(true); + } + Ok(false) + } + + /// If this returns true, we can get accurate results by only using + /// `symlink_metadata` for child nodes that exist in the dirstate and don’t + /// need to call `read_dir`. + fn can_skip_fs_readdir( + &self, + directory_entry: &DirEntry, + cached_directory_mtime: Option, + ) -> bool { + if !self.options.list_unknown && !self.options.list_ignored { + // All states that we care about listing have corresponding + // dirstate entries. + // This happens for example with `hg status -mard`. + return true; + } + if !self.options.list_ignored + && self.ignore_patterns_have_changed == Some(false) + { + if let Some(cached_mtime) = cached_directory_mtime { + // The dirstate contains a cached mtime for this directory, set + // by a previous run of the `status` algorithm which found this + // directory eligible for `read_dir` caching. + if let Ok(meta) = directory_entry.symlink_metadata() { + if cached_mtime + .likely_equal_to_mtime_of(&meta) + .unwrap_or(false) + { + // The mtime of that directory has not changed + // since then, which means that the results of + // `read_dir` should also be unchanged. + return true; + } + } + } + } + false + } + + fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool { + match set { + VisitChildrenSet::This | VisitChildrenSet::Recursive => true, + VisitChildrenSet::Empty => false, + VisitChildrenSet::Set(children_to_visit) => { + children_to_visit.contains(basename) + } + } + } + + /// Returns whether all child entries of the filesystem directory have a + /// corresponding dirstate node or are ignored. + fn traverse_fs_directory_and_dirstate<'ancestor>( + &self, + has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>, + dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, + directory_hg_path: &BorrowedPath<'tree, 'on_disk>, + directory_entry: &DirEntry, + cached_directory_mtime: Option, + is_at_repo_root: bool, + ) -> Result { + let children_set = self.matcher.visit_children_set(directory_hg_path); + if let VisitChildrenSet::Empty = children_set { + return Ok(false); + } + if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) { + dirstate_nodes + .par_iter() + .map(|dirstate_node| { + let fs_path = &directory_entry.fs_path; + let basename = + dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(); + let fs_path = fs_path.join(get_path_from_bytes(basename)); + if !Self::should_visit( + &children_set, + HgPath::new(basename), + ) { + return Ok(()); + } + match std::fs::symlink_metadata(&fs_path) { + Ok(fs_metadata) => { + let file_type = fs_metadata.file_type().into(); + let entry = DirEntry { + hg_path: Cow::Borrowed( + dirstate_node + .full_path(self.dmap.on_disk)?, + ), + fs_path: Cow::Borrowed(&fs_path), + symlink_metadata: Some(fs_metadata), + file_type, + }; + self.traverse_fs_and_dirstate( + &entry, + dirstate_node, + has_ignored_ancestor, + ) + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + self.traverse_dirstate_only(dirstate_node) + } + Err(error) => { + let hg_path = + dirstate_node.full_path(self.dmap.on_disk)?; + self.io_error(error, hg_path); + Ok(()) + } + } + }) + .collect::>()?; + + // We don’t know, so conservatively say this isn’t the case + let children_all_have_dirstate_node_or_are_ignored = false; + + return Ok(children_all_have_dirstate_node_or_are_ignored); + } + + let readdir_succeeded; + let mut fs_entries = if let Ok(entries) = self.read_dir( + directory_hg_path, + &directory_entry.fs_path, + is_at_repo_root, + ) { + readdir_succeeded = true; + entries + } else { + // Treat an unreadable directory (typically because of insufficient + // permissions) like an empty directory. `self.read_dir` has + // already called `self.io_error` so a warning will be emitted. + // We still need to remember that there was an error so that we + // know not to cache this result. + readdir_succeeded = false; + Vec::new() + }; + + // `merge_join_by` requires both its input iterators to be sorted: + + let dirstate_nodes = dirstate_nodes.sorted(); + // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: + // https://github.com/rust-lang/rust/issues/34162 + fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path)); + + // Propagate here any error that would happen inside the comparison + // callback below + for dirstate_node in &dirstate_nodes { + dirstate_node.base_name(self.dmap.on_disk)?; + } + itertools::merge_join_by( + dirstate_nodes, + &fs_entries, + |dirstate_node, fs_entry| { + // This `unwrap` never panics because we already propagated + // those errors above + dirstate_node + .base_name(self.dmap.on_disk) + .unwrap() + .cmp(&fs_entry.hg_path) + }, + ) + .par_bridge() + .map(|pair| { + use itertools::EitherOrBoth::*; + let basename = match &pair { + Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new( + dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), + ), + Right(fs_entry) => &fs_entry.hg_path, + }; + if !Self::should_visit(&children_set, basename) { + return Ok(false); + } + let has_dirstate_node_or_is_ignored = match pair { + Both(dirstate_node, fs_entry) => { + self.traverse_fs_and_dirstate( + fs_entry, + dirstate_node, + has_ignored_ancestor, + )?; + true + } + Left(dirstate_node) => { + self.traverse_dirstate_only(dirstate_node)?; + true + } + Right(fs_entry) => self.traverse_fs_only( + has_ignored_ancestor.force(&self.ignore_fn), + directory_hg_path, + fs_entry, + ), + }; + Ok(has_dirstate_node_or_is_ignored) + }) + .try_reduce(|| true, |a, b| Ok(a && b)) + .map(|res| res && readdir_succeeded) + } + + fn traverse_fs_and_dirstate<'ancestor>( + &self, + fs_entry: &DirEntry, + dirstate_node: NodeRef<'tree, 'on_disk>, + has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>, + ) -> Result<(), DirstateV2ParseError> { + let outdated_dircache = + self.check_for_outdated_directory_cache(&dirstate_node)?; + let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?; + let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink(); + if !file_or_symlink { + // If we previously had a file here, it was removed (with + // `hg rm` or similar) or deleted before it could be + // replaced by a directory or something else. + self.mark_removed_or_deleted_if_file(&dirstate_node)?; + } + if let Some(bad_type) = fs_entry.is_bad() { + if self.matcher.exact_match(hg_path) { + let path = dirstate_node.full_path(self.dmap.on_disk)?; + self.outcome.lock().unwrap().bad.push(( + path.to_owned().into(), + BadMatch::BadType(bad_type), + )) + } + } + if fs_entry.is_dir() { + if self.options.collect_traversed_dirs { + self.outcome + .lock() + .unwrap() + .traversed + .push(hg_path.detach_from_tree()) + } + let is_ignored = HasIgnoredAncestor::create( + Some(has_ignored_ancestor), + hg_path, + ); + let is_at_repo_root = false; + let children_all_have_dirstate_node_or_are_ignored = self + .traverse_fs_directory_and_dirstate( + &is_ignored, + dirstate_node.children(self.dmap.on_disk)?, + hg_path, + fs_entry, + dirstate_node.cached_directory_mtime()?, + is_at_repo_root, + )?; + self.maybe_save_directory_mtime( + children_all_have_dirstate_node_or_are_ignored, + fs_entry, + dirstate_node, + outdated_dircache, + )? + } else { + if file_or_symlink && self.matcher.matches(hg_path) { + if let Some(entry) = dirstate_node.entry()? { + if !entry.any_tracked() { + // Forward-compat if we start tracking unknown/ignored + // files for caching reasons + self.mark_unknown_or_ignored( + has_ignored_ancestor.force(&self.ignore_fn), + hg_path, + ); + } + if entry.added() { + self.push_outcome(Outcome::Added, &dirstate_node)?; + } else if entry.removed() { + self.push_outcome(Outcome::Removed, &dirstate_node)?; + } else if entry.modified() { + self.push_outcome(Outcome::Modified, &dirstate_node)?; + } else { + self.handle_normal_file(&dirstate_node, fs_entry)?; + } + } else { + // `node.entry.is_none()` indicates a "directory" + // node, but the filesystem has a file + self.mark_unknown_or_ignored( + has_ignored_ancestor.force(&self.ignore_fn), + hg_path, + ); + } + } + + for child_node in dirstate_node.children(self.dmap.on_disk)?.iter() + { + self.traverse_dirstate_only(child_node)? + } + } + Ok(()) + } + + /// Save directory mtime if applicable. + /// + /// `outdated_directory_cache` is `true` if we've just invalidated the + /// cache for this directory in `check_for_outdated_directory_cache`, + /// which forces the update. + fn maybe_save_directory_mtime( + &self, + children_all_have_dirstate_node_or_are_ignored: bool, + directory_entry: &DirEntry, + dirstate_node: NodeRef<'tree, 'on_disk>, + outdated_directory_cache: bool, + ) -> Result<(), DirstateV2ParseError> { + if !children_all_have_dirstate_node_or_are_ignored { + return Ok(()); + } + // All filesystem directory entries from `read_dir` have a + // corresponding node in the dirstate, so we can reconstitute the + // names of those entries without calling `read_dir` again. + + // TODO: use let-else here and below when available: + // https://github.com/rust-lang/rust/issues/87335 + let status_start = if let Some(status_start) = + &self.filesystem_time_at_status_start + { + status_start + } else { + return Ok(()); + }; + + // Although the Rust standard library’s `SystemTime` type + // has nanosecond precision, the times reported for a + // directory’s (or file’s) modified time may have lower + // resolution based on the filesystem (for example ext3 + // only stores integer seconds), kernel (see + // https://stackoverflow.com/a/14393315/1162888), etc. + let metadata = match directory_entry.symlink_metadata() { + Ok(meta) => meta, + Err(_) => return Ok(()), + }; + + let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of( + &metadata, + status_start, + ) { + Ok(Some(directory_mtime)) => directory_mtime, + Ok(None) => { + // The directory was modified too recently, + // don’t cache its `read_dir` results. + // + // 1. A change to this directory (direct child was added or + // removed) cause its mtime to be set (possibly truncated) + // to `directory_mtime` + // 2. This `status` algorithm calls `read_dir` + // 3. An other change is made to the same directory is made so + // that calling `read_dir` agin would give different + // results, but soon enough after 1. that the mtime stays + // the same + // + // On a system where the time resolution poor, this + // scenario is not unlikely if all three steps are caused + // by the same script. + return Ok(()); + } + Err(_) => { + // OS/libc does not support mtime? + return Ok(()); + } + }; + // We’ve observed (through `status_start`) that time has + // “progressed” since `directory_mtime`, so any further + // change to this directory is extremely likely to cause a + // different mtime. + // + // Having the same mtime again is not entirely impossible + // since the system clock is not monotonous. It could jump + // backward to some point before `directory_mtime`, then a + // directory change could potentially happen during exactly + // the wrong tick. + // + // We deem this scenario (unlike the previous one) to be + // unlikely enough in practice. + + let is_up_to_date = if let Some(cached) = + dirstate_node.cached_directory_mtime()? + { + !outdated_directory_cache && cached.likely_equal(directory_mtime) + } else { + false + }; + if !is_up_to_date { + let hg_path = dirstate_node + .full_path_borrowed(self.dmap.on_disk)? + .detach_from_tree(); + self.new_cacheable_directories + .lock() + .unwrap() + .push((hg_path, directory_mtime)) + } + Ok(()) + } + + /// A file that is clean in the dirstate was found in the filesystem + fn handle_normal_file( + &self, + dirstate_node: &NodeRef<'tree, 'on_disk>, + fs_entry: &DirEntry, + ) -> Result<(), DirstateV2ParseError> { + // Keep the low 31 bits + fn truncate_u64(value: u64) -> i32 { + (value & 0x7FFF_FFFF) as i32 + } + + let fs_metadata = match fs_entry.symlink_metadata() { + Ok(meta) => meta, + Err(_) => return Ok(()), + }; + + let entry = dirstate_node + .entry()? + .expect("handle_normal_file called with entry-less node"); + let mode_changed = + || self.options.check_exec && entry.mode_changed(&fs_metadata); + let size = entry.size(); + let size_changed = size != truncate_u64(fs_metadata.len()); + if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() { + // issue6456: Size returned may be longer due to encryption + // on EXT-4 fscrypt. TODO maybe only do it on EXT4? + self.push_outcome(Outcome::Unsure, dirstate_node)? + } else if dirstate_node.has_copy_source() + || entry.is_from_other_parent() + || (size >= 0 && (size_changed || mode_changed())) + { + self.push_outcome(Outcome::Modified, dirstate_node)? + } else { + let mtime_looks_clean = if let Some(dirstate_mtime) = + entry.truncated_mtime() + { + let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata) + .expect("OS/libc does not support mtime?"); + // There might be a change in the future if for example the + // internal clock become off while process run, but this is a + // case where the issues the user would face + // would be a lot worse and there is nothing we + // can really do. + fs_mtime.likely_equal(dirstate_mtime) + } else { + // No mtime in the dirstate entry + false + }; + if !mtime_looks_clean { + self.push_outcome(Outcome::Unsure, dirstate_node)? + } else if self.options.list_clean { + self.push_outcome(Outcome::Clean, dirstate_node)? + } + } + Ok(()) + } + + /// A node in the dirstate tree has no corresponding filesystem entry + fn traverse_dirstate_only( + &self, + dirstate_node: NodeRef<'tree, 'on_disk>, + ) -> Result<(), DirstateV2ParseError> { + self.check_for_outdated_directory_cache(&dirstate_node)?; + self.mark_removed_or_deleted_if_file(&dirstate_node)?; + dirstate_node + .children(self.dmap.on_disk)? + .par_iter() + .map(|child_node| self.traverse_dirstate_only(child_node)) + .collect() + } + + /// A node in the dirstate tree has no corresponding *file* on the + /// filesystem + /// + /// Does nothing on a "directory" node + fn mark_removed_or_deleted_if_file( + &self, + dirstate_node: &NodeRef<'tree, 'on_disk>, + ) -> Result<(), DirstateV2ParseError> { + if let Some(entry) = dirstate_node.entry()? { + if !entry.any_tracked() { + // Future-compat for when we start storing ignored and unknown + // files for caching reasons + return Ok(()); + } + let path = dirstate_node.full_path(self.dmap.on_disk)?; + if self.matcher.matches(path) { + if entry.removed() { + self.push_outcome(Outcome::Removed, dirstate_node)? + } else { + self.push_outcome(Outcome::Deleted, dirstate_node)? + } + } + } + Ok(()) + } + + /// Something in the filesystem has no corresponding dirstate node + /// + /// Returns whether that path is ignored + fn traverse_fs_only( + &self, + has_ignored_ancestor: bool, + directory_hg_path: &HgPath, + fs_entry: &DirEntry, + ) -> bool { + let hg_path = directory_hg_path.join(&fs_entry.hg_path); + let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink(); + if fs_entry.is_dir() { + let is_ignored = + has_ignored_ancestor || (self.ignore_fn)(&hg_path); + let traverse_children = if is_ignored { + // Descendants of an ignored directory are all ignored + self.options.list_ignored + } else { + // Descendants of an unknown directory may be either unknown or + // ignored + self.options.list_unknown || self.options.list_ignored + }; + if traverse_children { + let is_at_repo_root = false; + if let Ok(children_fs_entries) = + self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root) + { + children_fs_entries.par_iter().for_each(|child_fs_entry| { + self.traverse_fs_only( + is_ignored, + &hg_path, + child_fs_entry, + ); + }) + } + if self.options.collect_traversed_dirs { + self.outcome.lock().unwrap().traversed.push(hg_path.into()) + } + } + is_ignored + } else if file_or_symlink { + if self.matcher.matches(&hg_path) { + self.mark_unknown_or_ignored( + has_ignored_ancestor, + &BorrowedPath::InMemory(&hg_path), + ) + } else { + // We haven’t computed whether this path is ignored. It + // might not be, and a future run of status might have a + // different matcher that matches it. So treat it as not + // ignored. That is, inhibit readdir caching of the parent + // directory. + false + } + } else { + // This is neither a directory, a plain file, or a symlink. + // Treat it like an ignored file. + true + } + } + + /// Returns whether that path is ignored + fn mark_unknown_or_ignored( + &self, + has_ignored_ancestor: bool, + hg_path: &BorrowedPath<'_, 'on_disk>, + ) -> bool { + let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); + if is_ignored { + if self.options.list_ignored { + self.push_outcome_without_copy_source( + Outcome::Ignored, + hg_path, + ) + } + } else if self.options.list_unknown { + self.push_outcome_without_copy_source(Outcome::Unknown, hg_path) + } + is_ignored + } +} + +/// Since [`std::fs::FileType`] cannot be built directly, we emulate what we +/// care about. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +enum FakeFileType { + File, + Directory, + Symlink, + BadType(BadType), +} + +impl From for FakeFileType { + fn from(f: std::fs::FileType) -> Self { + if f.is_dir() { + Self::Directory + } else if f.is_file() { + Self::File + } else if f.is_symlink() { + Self::Symlink + } else if f.is_fifo() { + Self::BadType(BadType::FIFO) + } else if f.is_block_device() { + Self::BadType(BadType::BlockDevice) + } else if f.is_char_device() { + Self::BadType(BadType::CharacterDevice) + } else if f.is_socket() { + Self::BadType(BadType::Socket) + } else { + Self::BadType(BadType::Unknown) + } + } +} + +struct DirEntry<'a> { + /// Path as stored in the dirstate, or just the filename for optimization. + hg_path: HgPathCow<'a>, + /// Filesystem path + fs_path: Cow<'a, Path>, + /// Lazily computed + symlink_metadata: Option, + /// Already computed for ergonomics. + file_type: FakeFileType, +} + +impl<'a> DirEntry<'a> { + /// Returns **unsorted** entries in the given directory, with name, + /// metadata and file type. + /// + /// If a `.hg` sub-directory is encountered: + /// + /// * At the repository root, ignore that sub-directory + /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty + /// list instead. + fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result> { + // `read_dir` returns a "not found" error for the empty path + let at_cwd = path == Path::new(""); + let read_dir_path = if at_cwd { Path::new(".") } else { path }; + let mut results = Vec::new(); + for entry in read_dir_path.read_dir()? { + let entry = entry?; + let file_type = match entry.file_type() { + Ok(v) => v, + Err(e) => { + // race with file deletion? + if e.kind() == std::io::ErrorKind::NotFound { + continue; + } else { + return Err(e); + } + } + }; + let file_name = entry.file_name(); + // FIXME don't do this when cached + if file_name == ".hg" { + if is_at_repo_root { + // Skip the repo’s own .hg (might be a symlink) + continue; + } else if file_type.is_dir() { + // A .hg sub-directory at another location means a subrepo, + // skip it entirely. + return Ok(Vec::new()); + } + } + let full_path = if at_cwd { + file_name.clone().into() + } else { + entry.path() + }; + let filename = + Cow::Owned(get_bytes_from_os_string(file_name).into()); + let file_type = FakeFileType::from(file_type); + results.push(DirEntry { + hg_path: filename, + fs_path: Cow::Owned(full_path.to_path_buf()), + symlink_metadata: None, + file_type, + }) + } + Ok(results) + } + + fn symlink_metadata(&self) -> Result { + match &self.symlink_metadata { + Some(meta) => Ok(meta.clone()), + None => std::fs::symlink_metadata(&self.fs_path), + } + } + + fn is_dir(&self) -> bool { + self.file_type == FakeFileType::Directory + } + + fn is_file(&self) -> bool { + self.file_type == FakeFileType::File + } + + fn is_symlink(&self) -> bool { + self.file_type == FakeFileType::Symlink + } + + fn is_bad(&self) -> Option { + match self.file_type { + FakeFileType::BadType(ty) => Some(ty), + _ => None, + } + } +} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate_tree.rs --- a/rust/hg-core/src/dirstate_tree.rs Mon Nov 04 10:38:17 2024 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -pub mod dirstate_map; -pub mod on_disk; -pub mod owning; -pub mod path_with_basename; -pub mod status; diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate_tree/dirstate_map.rs --- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Mon Nov 04 10:38:17 2024 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2123 +0,0 @@ -use bytes_cast::BytesCast; -use std::borrow::Cow; -use std::fs::Metadata; -use std::os::unix::fs::MetadataExt; -use std::path::PathBuf; - -use super::on_disk; -use super::on_disk::DirstateV2ParseError; -use super::owning::OwningDirstateMap; -use super::path_with_basename::WithBasename; -use crate::dirstate::parsers::pack_entry; -use crate::dirstate::parsers::packed_entry_size; -use crate::dirstate::parsers::parse_dirstate_entries; -use crate::dirstate::CopyMapIter; -use crate::dirstate::DirstateV2Data; -use crate::dirstate::ParentFileData; -use crate::dirstate::StateMapIter; -use crate::dirstate::TruncatedTimestamp; -use crate::matchers::Matcher; -use crate::utils::filter_map_results; -use crate::utils::hg_path::{HgPath, HgPathBuf}; -use crate::DirstateEntry; -use crate::DirstateError; -use crate::DirstateMapError; -use crate::DirstateParents; -use crate::DirstateStatus; -use crate::FastHashbrownMap as FastHashMap; -use crate::PatternFileWarning; -use crate::StatusError; -use crate::StatusOptions; - -/// Append to an existing data file if the amount of unreachable data (not used -/// anymore) is less than this fraction of the total amount of existing data. -const ACCEPTABLE_UNREACHABLE_BYTES_RATIO: f32 = 0.5; - -#[derive(Debug, PartialEq, Eq)] -/// Version of the on-disk format -pub enum DirstateVersion { - V1, - V2, -} - -#[derive(Debug, PartialEq, Eq)] -pub enum DirstateMapWriteMode { - Auto, - ForceNewDataFile, - ForceAppend, -} - -/// Used to detect out-of-process changes in the dirstate -#[derive(Debug, Copy, Clone)] -pub struct DirstateIdentity { - pub mode: u32, - pub dev: u64, - pub ino: u64, - pub nlink: u64, - pub uid: u32, - pub gid: u32, - pub size: u64, - pub mtime: i64, - pub mtime_nsec: i64, - pub ctime: i64, - pub ctime_nsec: i64, -} - -impl From for DirstateIdentity { - fn from(value: Metadata) -> Self { - Self { - mode: value.mode(), - dev: value.dev(), - ino: value.ino(), - nlink: value.nlink(), - uid: value.uid(), - gid: value.gid(), - size: value.size(), - mtime: value.mtime(), - mtime_nsec: value.mtime_nsec(), - ctime: value.ctime(), - ctime_nsec: value.ctime_nsec(), - } - } -} - -impl PartialEq for DirstateIdentity { - fn eq(&self, other: &Self) -> bool { - // Some platforms return 0 when they have no support for nanos. - // This shouldn't be a problem in practice because of how highly - // unlikely it is that we actually get exactly 0 nanos, and worst - // case scenario, we don't write out the dirstate in a non-wlocked - // situation like status. - let mtime_nanos_equal = (self.mtime_nsec == 0 - || other.mtime_nsec == 0) - || self.mtime_nsec == other.mtime_nsec; - let ctime_nanos_equal = (self.ctime_nsec == 0 - || other.ctime_nsec == 0) - || self.ctime_nsec == other.ctime_nsec; - - self.mode == other.mode - && self.dev == other.dev - && self.ino == other.ino - && self.nlink == other.nlink - && self.uid == other.uid - && self.gid == other.gid - && self.size == other.size - && self.mtime == other.mtime - && mtime_nanos_equal - && self.ctime == other.ctime - && ctime_nanos_equal - } -} - -#[derive(Debug)] -pub struct DirstateMap<'on_disk> { - /// Contents of the `.hg/dirstate` file - pub(super) on_disk: &'on_disk [u8], - - pub(super) root: ChildNodes<'on_disk>, - - /// Number of nodes anywhere in the tree that have `.entry.is_some()`. - pub(super) nodes_with_entry_count: u32, - - /// Number of nodes anywhere in the tree that have - /// `.copy_source.is_some()`. - pub(super) nodes_with_copy_source_count: u32, - - /// See on_disk::Header - pub(super) ignore_patterns_hash: on_disk::IgnorePatternsHash, - - /// How many bytes of `on_disk` are not used anymore - pub(super) unreachable_bytes: u32, - - /// Size of the data used to first load this `DirstateMap`. Used in case - /// we need to write some new metadata, but no new data on disk, - /// as well as to detect writes that have happened in another process - /// since first read. - pub(super) old_data_size: usize, - - /// UUID used when first loading this `DirstateMap`. Used to check if - /// the UUID has been changed by another process since first read. - /// Can be `None` if using dirstate v1 or if it's a brand new dirstate. - pub(super) old_uuid: Option>, - - /// Identity of the dirstate file (for dirstate-v1) or the docket file - /// (v2). Used to detect if the file has changed from another process. - /// Since it's always written atomically, we can compare the inode to - /// check the file identity. - /// - /// TODO On non-Unix systems, something like hashing is a possibility? - pub(super) identity: Option, - - pub(super) dirstate_version: DirstateVersion, - - /// Controlled by config option `devel.dirstate.v2.data_update_mode` - pub(super) write_mode: DirstateMapWriteMode, - - /// Controlled by config option `format.use-dirstate-tracked-hint` - pub(super) use_tracked_hint: bool, -} - -/// Using a plain `HgPathBuf` of the full path from the repository root as a -/// map key would also work: all paths in a given map have the same parent -/// path, so comparing full paths gives the same result as comparing base -/// names. However `HashMap` would waste time always re-hashing the same -/// string prefix. -pub(super) type NodeKey<'on_disk> = WithBasename>; - -/// Similar to `&'tree Cow<'on_disk, HgPath>`, but can also be returned -/// for on-disk nodes that don’t actually have a `Cow` to borrow. -#[derive(Debug)] -pub(super) enum BorrowedPath<'tree, 'on_disk> { - InMemory(&'tree HgPathBuf), - OnDisk(&'on_disk HgPath), -} - -#[derive(Debug)] -pub(super) enum ChildNodes<'on_disk> { - InMemory(FastHashMap, Node<'on_disk>>), - OnDisk(&'on_disk [on_disk::Node]), -} - -#[derive(Debug)] -pub(super) enum ChildNodesRef<'tree, 'on_disk> { - InMemory(&'tree FastHashMap, Node<'on_disk>>), - OnDisk(&'on_disk [on_disk::Node]), -} - -#[derive(Debug)] -pub(super) enum NodeRef<'tree, 'on_disk> { - InMemory(&'tree NodeKey<'on_disk>, &'tree Node<'on_disk>), - OnDisk(&'on_disk on_disk::Node), -} - -impl<'tree, 'on_disk> BorrowedPath<'tree, 'on_disk> { - pub fn detach_from_tree(&self) -> Cow<'on_disk, HgPath> { - match *self { - BorrowedPath::InMemory(in_memory) => Cow::Owned(in_memory.clone()), - BorrowedPath::OnDisk(on_disk) => Cow::Borrowed(on_disk), - } - } -} - -impl<'tree, 'on_disk> std::ops::Deref for BorrowedPath<'tree, 'on_disk> { - type Target = HgPath; - - fn deref(&self) -> &HgPath { - match *self { - BorrowedPath::InMemory(in_memory) => in_memory, - BorrowedPath::OnDisk(on_disk) => on_disk, - } - } -} - -impl Default for ChildNodes<'_> { - fn default() -> Self { - ChildNodes::InMemory(Default::default()) - } -} - -impl<'on_disk> ChildNodes<'on_disk> { - pub(super) fn as_ref<'tree>( - &'tree self, - ) -> ChildNodesRef<'tree, 'on_disk> { - match self { - ChildNodes::InMemory(nodes) => ChildNodesRef::InMemory(nodes), - ChildNodes::OnDisk(nodes) => ChildNodesRef::OnDisk(nodes), - } - } - - pub(super) fn is_empty(&self) -> bool { - match self { - ChildNodes::InMemory(nodes) => nodes.is_empty(), - ChildNodes::OnDisk(nodes) => nodes.is_empty(), - } - } - - fn make_mut( - &mut self, - on_disk: &'on_disk [u8], - unreachable_bytes: &mut u32, - ) -> Result< - &mut FastHashMap, Node<'on_disk>>, - DirstateV2ParseError, - > { - match self { - ChildNodes::InMemory(nodes) => Ok(nodes), - ChildNodes::OnDisk(nodes) => { - *unreachable_bytes += - std::mem::size_of_val::<[on_disk::Node]>(*nodes) as u32; - let nodes = nodes - .iter() - .map(|node| { - Ok(( - node.path(on_disk)?, - node.to_in_memory_node(on_disk)?, - )) - }) - .collect::>()?; - *self = ChildNodes::InMemory(nodes); - match self { - ChildNodes::InMemory(nodes) => Ok(nodes), - ChildNodes::OnDisk(_) => unreachable!(), - } - } - } - } -} - -impl<'tree, 'on_disk> ChildNodesRef<'tree, 'on_disk> { - pub(super) fn get( - &self, - base_name: &HgPath, - on_disk: &'on_disk [u8], - ) -> Result>, DirstateV2ParseError> { - match self { - ChildNodesRef::InMemory(nodes) => Ok(nodes - .get_key_value(base_name) - .map(|(k, v)| NodeRef::InMemory(k, v))), - ChildNodesRef::OnDisk(nodes) => { - let mut parse_result = Ok(()); - let search_result = nodes.binary_search_by(|node| { - match node.base_name(on_disk) { - Ok(node_base_name) => node_base_name.cmp(base_name), - Err(e) => { - parse_result = Err(e); - // Dummy comparison result, `search_result` won’t - // be used since `parse_result` is an error - std::cmp::Ordering::Equal - } - } - }); - parse_result.map(|()| { - search_result.ok().map(|i| NodeRef::OnDisk(&nodes[i])) - }) - } - } - } - - /// Iterate in undefined order - pub(super) fn iter( - &self, - ) -> impl Iterator> { - match self { - ChildNodesRef::InMemory(nodes) => itertools::Either::Left( - nodes.iter().map(|(k, v)| NodeRef::InMemory(k, v)), - ), - ChildNodesRef::OnDisk(nodes) => { - itertools::Either::Right(nodes.iter().map(NodeRef::OnDisk)) - } - } - } - - /// Iterate in parallel in undefined order - pub(super) fn par_iter( - &self, - ) -> impl rayon::iter::ParallelIterator> - { - use rayon::prelude::*; - match self { - ChildNodesRef::InMemory(nodes) => rayon::iter::Either::Left( - nodes.par_iter().map(|(k, v)| NodeRef::InMemory(k, v)), - ), - ChildNodesRef::OnDisk(nodes) => rayon::iter::Either::Right( - nodes.par_iter().map(NodeRef::OnDisk), - ), - } - } - - pub(super) fn sorted(&self) -> Vec> { - match self { - ChildNodesRef::InMemory(nodes) => { - let mut vec: Vec<_> = nodes - .iter() - .map(|(k, v)| NodeRef::InMemory(k, v)) - .collect(); - fn sort_key<'a>(node: &'a NodeRef) -> &'a HgPath { - match node { - NodeRef::InMemory(path, _node) => path.base_name(), - NodeRef::OnDisk(_) => unreachable!(), - } - } - // `sort_unstable_by_key` doesn’t allow keys borrowing from the - // value: https://github.com/rust-lang/rust/issues/34162 - vec.sort_unstable_by(|a, b| sort_key(a).cmp(sort_key(b))); - vec - } - ChildNodesRef::OnDisk(nodes) => { - // Nodes on disk are already sorted - nodes.iter().map(NodeRef::OnDisk).collect() - } - } - } -} - -impl<'tree, 'on_disk> NodeRef<'tree, 'on_disk> { - pub(super) fn full_path( - &self, - on_disk: &'on_disk [u8], - ) -> Result<&'tree HgPath, DirstateV2ParseError> { - match self { - NodeRef::InMemory(path, _node) => Ok(path.full_path()), - NodeRef::OnDisk(node) => node.full_path(on_disk), - } - } - - /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk, - /// HgPath>` detached from `'tree` - pub(super) fn full_path_borrowed( - &self, - on_disk: &'on_disk [u8], - ) -> Result, DirstateV2ParseError> { - match self { - NodeRef::InMemory(path, _node) => match path.full_path() { - Cow::Borrowed(on_disk) => Ok(BorrowedPath::OnDisk(on_disk)), - Cow::Owned(in_memory) => Ok(BorrowedPath::InMemory(in_memory)), - }, - NodeRef::OnDisk(node) => { - Ok(BorrowedPath::OnDisk(node.full_path(on_disk)?)) - } - } - } - - pub(super) fn base_name( - &self, - on_disk: &'on_disk [u8], - ) -> Result<&'tree HgPath, DirstateV2ParseError> { - match self { - NodeRef::InMemory(path, _node) => Ok(path.base_name()), - NodeRef::OnDisk(node) => node.base_name(on_disk), - } - } - - pub(super) fn children( - &self, - on_disk: &'on_disk [u8], - ) -> Result, DirstateV2ParseError> { - match self { - NodeRef::InMemory(_path, node) => Ok(node.children.as_ref()), - NodeRef::OnDisk(node) => { - Ok(ChildNodesRef::OnDisk(node.children(on_disk)?)) - } - } - } - - pub(super) fn has_copy_source(&self) -> bool { - match self { - NodeRef::InMemory(_path, node) => node.copy_source.is_some(), - NodeRef::OnDisk(node) => node.has_copy_source(), - } - } - - pub(super) fn copy_source( - &self, - on_disk: &'on_disk [u8], - ) -> Result, DirstateV2ParseError> { - match self { - NodeRef::InMemory(_path, node) => Ok(node.copy_source.as_deref()), - NodeRef::OnDisk(node) => node.copy_source(on_disk), - } - } - /// Returns a `BorrowedPath`, which can be turned into a `Cow<'on_disk, - /// HgPath>` detached from `'tree` - pub(super) fn copy_source_borrowed( - &self, - on_disk: &'on_disk [u8], - ) -> Result>, DirstateV2ParseError> - { - Ok(match self { - NodeRef::InMemory(_path, node) => { - node.copy_source.as_ref().map(|source| match source { - Cow::Borrowed(on_disk) => BorrowedPath::OnDisk(on_disk), - Cow::Owned(in_memory) => BorrowedPath::InMemory(in_memory), - }) - } - NodeRef::OnDisk(node) => { - node.copy_source(on_disk)?.map(BorrowedPath::OnDisk) - } - }) - } - - pub(super) fn entry( - &self, - ) -> Result, DirstateV2ParseError> { - match self { - NodeRef::InMemory(_path, node) => { - Ok(node.data.as_entry().copied()) - } - NodeRef::OnDisk(node) => node.entry(), - } - } - - pub(super) fn cached_directory_mtime( - &self, - ) -> Result, DirstateV2ParseError> { - match self { - NodeRef::InMemory(_path, node) => Ok(match node.data { - NodeData::CachedDirectory { mtime } => Some(mtime), - _ => None, - }), - NodeRef::OnDisk(node) => node.cached_directory_mtime(), - } - } - - pub(super) fn descendants_with_entry_count(&self) -> u32 { - match self { - NodeRef::InMemory(_path, node) => { - node.descendants_with_entry_count - } - NodeRef::OnDisk(node) => node.descendants_with_entry_count.get(), - } - } - - pub(super) fn tracked_descendants_count(&self) -> u32 { - match self { - NodeRef::InMemory(_path, node) => node.tracked_descendants_count, - NodeRef::OnDisk(node) => node.tracked_descendants_count.get(), - } - } -} - -/// Represents a file or a directory -#[derive(Default, Debug)] -pub(super) struct Node<'on_disk> { - pub(super) data: NodeData, - - pub(super) copy_source: Option>, - - pub(super) children: ChildNodes<'on_disk>, - - /// How many (non-inclusive) descendants of this node have an entry. - pub(super) descendants_with_entry_count: u32, - - /// How many (non-inclusive) descendants of this node have an entry whose - /// state is "tracked". - pub(super) tracked_descendants_count: u32, -} - -#[derive(Debug, Default)] -pub(super) enum NodeData { - Entry(DirstateEntry), - CachedDirectory { - mtime: TruncatedTimestamp, - }, - #[default] - None, -} - -impl NodeData { - fn has_entry(&self) -> bool { - matches!(self, NodeData::Entry(_)) - } - - fn as_entry(&self) -> Option<&DirstateEntry> { - match self { - NodeData::Entry(entry) => Some(entry), - _ => None, - } - } - - fn as_entry_mut(&mut self) -> Option<&mut DirstateEntry> { - match self { - NodeData::Entry(entry) => Some(entry), - _ => None, - } - } -} - -impl<'on_disk> DirstateMap<'on_disk> { - pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { - Self { - on_disk, - root: ChildNodes::default(), - nodes_with_entry_count: 0, - nodes_with_copy_source_count: 0, - ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN], - unreachable_bytes: 0, - old_data_size: 0, - old_uuid: None, - identity: None, - dirstate_version: DirstateVersion::V1, - write_mode: DirstateMapWriteMode::Auto, - use_tracked_hint: false, - } - } - - #[logging_timer::time("trace")] - pub fn new_v2( - on_disk: &'on_disk [u8], - data_size: usize, - metadata: &[u8], - uuid: Vec, - identity: Option, - ) -> Result { - if let Some(data) = on_disk.get(..data_size) { - Ok(on_disk::read(data, metadata, uuid, identity)?) - } else { - Err(DirstateV2ParseError::new("not enough bytes on disk").into()) - } - } - - #[logging_timer::time("trace")] - pub fn new_v1( - on_disk: &'on_disk [u8], - identity: Option, - ) -> Result<(Self, Option), DirstateError> { - let mut map = Self::empty(on_disk); - map.identity = identity; - - if map.on_disk.is_empty() { - return Ok((map, None)); - } - - let parents = parse_dirstate_entries( - map.on_disk, - |path, entry, copy_source| { - let tracked = entry.tracked(); - let node = Self::get_or_insert_node_inner( - map.on_disk, - &mut map.unreachable_bytes, - &mut map.root, - path, - WithBasename::to_cow_borrowed, - |ancestor| { - if tracked { - ancestor.tracked_descendants_count += 1 - } - ancestor.descendants_with_entry_count += 1 - }, - )?; - assert!( - !node.data.has_entry(), - "duplicate dirstate entry in read" - ); - assert!( - node.copy_source.is_none(), - "duplicate dirstate entry in read" - ); - node.data = NodeData::Entry(*entry); - node.copy_source = copy_source.map(Cow::Borrowed); - map.nodes_with_entry_count += 1; - if copy_source.is_some() { - map.nodes_with_copy_source_count += 1 - } - Ok(()) - }, - )?; - let parents = Some(*parents); - - Ok((map, parents)) - } - - /// Assuming dirstate-v2 format, returns whether the next write should - /// append to the existing data file that contains `self.on_disk` (true), - /// or create a new data file from scratch (false). - pub(super) fn write_should_append(&self) -> bool { - match self.write_mode { - DirstateMapWriteMode::ForceAppend => true, - DirstateMapWriteMode::ForceNewDataFile => false, - DirstateMapWriteMode::Auto => { - let ratio = - self.unreachable_bytes as f32 / self.on_disk.len() as f32; - ratio < ACCEPTABLE_UNREACHABLE_BYTES_RATIO - } - } - } - - fn get_node<'tree>( - &'tree self, - path: &HgPath, - ) -> Result>, DirstateV2ParseError> { - let mut children = self.root.as_ref(); - let mut components = path.components(); - let mut component = - components.next().expect("expected at least one components"); - loop { - if let Some(child) = children.get(component, self.on_disk)? { - if let Some(next_component) = components.next() { - component = next_component; - children = child.children(self.on_disk)?; - } else { - return Ok(Some(child)); - } - } else { - return Ok(None); - } - } - } - - pub fn has_node( - &self, - path: &HgPath, - ) -> Result { - let node = self.get_node(path)?; - Ok(node.is_some()) - } - - /// Returns a mutable reference to the node at `path` if it exists - /// - /// `each_ancestor` is a callback that is called for each ancestor node - /// when descending the tree. It is used to keep the different counters - /// of the `DirstateMap` up-to-date. - fn get_node_mut<'tree>( - &'tree mut self, - path: &HgPath, - each_ancestor: impl FnMut(&mut Node), - ) -> Result>, DirstateV2ParseError> { - Self::get_node_mut_inner( - self.on_disk, - &mut self.unreachable_bytes, - &mut self.root, - path, - each_ancestor, - ) - } - - /// Lower-level version of `get_node_mut`. - /// - /// This takes `root` instead of `&mut self` so that callers can mutate - /// other fields while the returned borrow is still valid. - /// - /// `each_ancestor` is a callback that is called for each ancestor node - /// when descending the tree. It is used to keep the different counters - /// of the `DirstateMap` up-to-date. - fn get_node_mut_inner<'tree>( - on_disk: &'on_disk [u8], - unreachable_bytes: &mut u32, - root: &'tree mut ChildNodes<'on_disk>, - path: &HgPath, - mut each_ancestor: impl FnMut(&mut Node), - ) -> Result>, DirstateV2ParseError> { - let mut children = root; - let mut components = path.components(); - let mut component = - components.next().expect("expected at least one components"); - loop { - if let Some(child) = children - .make_mut(on_disk, unreachable_bytes)? - .get_mut(component) - { - if let Some(next_component) = components.next() { - each_ancestor(child); - component = next_component; - children = &mut child.children; - } else { - return Ok(Some(child)); - } - } else { - return Ok(None); - } - } - } - - /// Get a mutable reference to the node at `path`, creating it if it does - /// not exist. - /// - /// `each_ancestor` is a callback that is called for each ancestor node - /// when descending the tree. It is used to keep the different counters - /// of the `DirstateMap` up-to-date. - fn get_or_insert_node<'tree, 'path>( - &'tree mut self, - path: &'path HgPath, - each_ancestor: impl FnMut(&mut Node), - ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { - Self::get_or_insert_node_inner( - self.on_disk, - &mut self.unreachable_bytes, - &mut self.root, - path, - WithBasename::to_cow_owned, - each_ancestor, - ) - } - - /// Lower-level version of `get_or_insert_node_inner`, which is used when - /// parsing disk data to remove allocations for new nodes. - fn get_or_insert_node_inner<'tree, 'path>( - on_disk: &'on_disk [u8], - unreachable_bytes: &mut u32, - root: &'tree mut ChildNodes<'on_disk>, - path: &'path HgPath, - to_cow: impl Fn( - WithBasename<&'path HgPath>, - ) -> WithBasename>, - mut each_ancestor: impl FnMut(&mut Node), - ) -> Result<&'tree mut Node<'on_disk>, DirstateV2ParseError> { - let mut child_nodes = root; - let mut inclusive_ancestor_paths = - WithBasename::inclusive_ancestors_of(path); - let mut ancestor_path = inclusive_ancestor_paths - .next() - .expect("expected at least one inclusive ancestor"); - loop { - let (_, child_node) = child_nodes - .make_mut(on_disk, unreachable_bytes)? - .raw_entry_mut() - .from_key(ancestor_path.base_name()) - .or_insert_with(|| (to_cow(ancestor_path), Node::default())); - if let Some(next) = inclusive_ancestor_paths.next() { - each_ancestor(child_node); - ancestor_path = next; - child_nodes = &mut child_node.children; - } else { - return Ok(child_node); - } - } - } - - #[allow(clippy::too_many_arguments)] - fn reset_state( - &mut self, - filename: &HgPath, - old_entry_opt: Option, - wc_tracked: bool, - p1_tracked: bool, - p2_info: bool, - has_meaningful_mtime: bool, - parent_file_data_opt: Option, - ) -> Result<(), DirstateError> { - let (had_entry, was_tracked) = match old_entry_opt { - Some(old_entry) => (true, old_entry.tracked()), - None => (false, false), - }; - let node = self.get_or_insert_node(filename, |ancestor| { - if !had_entry { - ancestor.descendants_with_entry_count += 1; - } - if was_tracked { - if !wc_tracked { - ancestor.tracked_descendants_count = ancestor - .tracked_descendants_count - .checked_sub(1) - .expect("tracked count to be >= 0"); - } - } else if wc_tracked { - ancestor.tracked_descendants_count += 1; - } - })?; - - let v2_data = if let Some(parent_file_data) = parent_file_data_opt { - DirstateV2Data { - wc_tracked, - p1_tracked, - p2_info, - mode_size: parent_file_data.mode_size, - mtime: if has_meaningful_mtime { - parent_file_data.mtime - } else { - None - }, - ..Default::default() - } - } else { - DirstateV2Data { - wc_tracked, - p1_tracked, - p2_info, - ..Default::default() - } - }; - node.data = NodeData::Entry(DirstateEntry::from_v2_data(v2_data)); - if !had_entry { - self.nodes_with_entry_count += 1; - } - Ok(()) - } - - fn set_tracked( - &mut self, - filename: &HgPath, - old_entry_opt: Option, - ) -> Result { - let was_tracked = old_entry_opt.map_or(false, |e| e.tracked()); - let had_entry = old_entry_opt.is_some(); - let tracked_count_increment = u32::from(!was_tracked); - let mut new = false; - - let node = self.get_or_insert_node(filename, |ancestor| { - if !had_entry { - ancestor.descendants_with_entry_count += 1; - } - - ancestor.tracked_descendants_count += tracked_count_increment; - })?; - if let Some(old_entry) = old_entry_opt { - let mut e = old_entry; - if e.tracked() { - // XXX - // This is probably overkill for more case, but we need this to - // fully replace the `normallookup` call with `set_tracked` - // one. Consider smoothing this in the future. - e.set_possibly_dirty(); - } else { - new = true; - e.set_tracked(); - } - node.data = NodeData::Entry(e) - } else { - node.data = NodeData::Entry(DirstateEntry::new_tracked()); - self.nodes_with_entry_count += 1; - new = true; - }; - Ok(new) - } - - /// Set a node as untracked in the dirstate. - /// - /// It is the responsibility of the caller to remove the copy source and/or - /// the entry itself if appropriate. - /// - /// # Panics - /// - /// Panics if the node does not exist. - fn set_untracked( - &mut self, - filename: &HgPath, - old_entry: DirstateEntry, - ) -> Result<(), DirstateV2ParseError> { - let node = self - .get_node_mut(filename, |ancestor| { - ancestor.tracked_descendants_count = ancestor - .tracked_descendants_count - .checked_sub(1) - .expect("tracked_descendants_count should be >= 0"); - })? - .expect("node should exist"); - let mut new_entry = old_entry; - new_entry.set_untracked(); - node.data = NodeData::Entry(new_entry); - Ok(()) - } - - /// Set a node as clean in the dirstate. - /// - /// It is the responsibility of the caller to remove the copy source. - /// - /// # Panics - /// - /// Panics if the node does not exist. - fn set_clean( - &mut self, - filename: &HgPath, - old_entry: DirstateEntry, - mode: u32, - size: u32, - mtime: TruncatedTimestamp, - ) -> Result<(), DirstateError> { - let node = self - .get_node_mut(filename, |ancestor| { - if !old_entry.tracked() { - ancestor.tracked_descendants_count += 1; - } - })? - .expect("node should exist"); - let mut new_entry = old_entry; - new_entry.set_clean(mode, size, mtime); - node.data = NodeData::Entry(new_entry); - Ok(()) - } - - /// Set a node as possibly dirty in the dirstate. - /// - /// # Panics - /// - /// Panics if the node does not exist. - fn set_possibly_dirty( - &mut self, - filename: &HgPath, - ) -> Result<(), DirstateError> { - let node = self - .get_node_mut(filename, |_ancestor| {})? - .expect("node should exist"); - let entry = node.data.as_entry_mut().expect("entry should exist"); - entry.set_possibly_dirty(); - node.data = NodeData::Entry(*entry); - Ok(()) - } - - /// Clears the cached mtime for the (potential) folder at `path`. - pub(super) fn clear_cached_mtime( - &mut self, - path: &HgPath, - ) -> Result<(), DirstateV2ParseError> { - let node = match self.get_node_mut(path, |_ancestor| {})? { - Some(node) => node, - None => return Ok(()), - }; - if let NodeData::CachedDirectory { .. } = &node.data { - node.data = NodeData::None - } - Ok(()) - } - - /// Sets the cached mtime for the (potential) folder at `path`. - pub(super) fn set_cached_mtime( - &mut self, - path: &HgPath, - mtime: TruncatedTimestamp, - ) -> Result<(), DirstateV2ParseError> { - let node = match self.get_node_mut(path, |_ancestor| {})? { - Some(node) => node, - None => return Ok(()), - }; - match &node.data { - NodeData::Entry(_) => {} // Don’t overwrite an entry - NodeData::CachedDirectory { .. } | NodeData::None => { - node.data = NodeData::CachedDirectory { mtime } - } - } - Ok(()) - } - - fn iter_nodes<'tree>( - &'tree self, - ) -> impl Iterator< - Item = Result, DirstateV2ParseError>, - > + 'tree { - // Depth first tree traversal. - // - // If we could afford internal iteration and recursion, - // this would look like: - // - // ``` - // fn traverse_children( - // children: &ChildNodes, - // each: &mut impl FnMut(&Node), - // ) { - // for child in children.values() { - // traverse_children(&child.children, each); - // each(child); - // } - // } - // ``` - // - // However we want an external iterator and therefore can’t use the - // call stack. Use an explicit stack instead: - let mut stack = Vec::new(); - let mut iter = self.root.as_ref().iter(); - std::iter::from_fn(move || { - while let Some(child_node) = iter.next() { - let children = match child_node.children(self.on_disk) { - Ok(children) => children, - Err(error) => return Some(Err(error)), - }; - // Pseudo-recursion - let new_iter = children.iter(); - let old_iter = std::mem::replace(&mut iter, new_iter); - stack.push((child_node, old_iter)); - } - // Found the end of a `children.iter()` iterator. - if let Some((child_node, next_iter)) = stack.pop() { - // "Return" from pseudo-recursion by restoring state from the - // explicit stack - iter = next_iter; - - Some(Ok(child_node)) - } else { - // Reached the bottom of the stack, we’re done - None - } - }) - } - - fn count_dropped_path(unreachable_bytes: &mut u32, path: Cow) { - if let Cow::Borrowed(path) = path { - *unreachable_bytes += path.len() as u32 - } - } - - pub(crate) fn set_write_mode(&mut self, write_mode: DirstateMapWriteMode) { - self.write_mode = write_mode; - } - - pub(crate) fn set_tracked_hint(&mut self, tracked_hint: bool) { - self.use_tracked_hint = tracked_hint; - } -} - -/// Sets the parameters for resetting a dirstate entry -pub struct DirstateEntryReset<'a> { - /// Which entry are we resetting - pub filename: &'a HgPath, - /// Whether the entry is tracked in the working copy - pub wc_tracked: bool, - /// Whether the entry is tracked in p1 - pub p1_tracked: bool, - /// Whether the entry has merge information - pub p2_info: bool, - /// Whether the entry's mtime should be trusted - pub has_meaningful_mtime: bool, - /// Information from the parent file data (from the manifest) - pub parent_file_data_opt: Option, - /// Set this to `true` if you are *certain* that there is no old entry for - /// this filename. Yield better performance in cases where we do a lot - /// of additions to the dirstate. - pub from_empty: bool, -} - -type DebugDirstateTuple<'a> = (&'a HgPath, (u8, i32, i32, i32)); - -impl OwningDirstateMap { - pub fn clear(&mut self) { - self.with_dmap_mut(|map| { - map.root = Default::default(); - map.nodes_with_entry_count = 0; - map.nodes_with_copy_source_count = 0; - map.unreachable_bytes = map.on_disk.len() as u32; - }); - } - - pub fn set_tracked( - &mut self, - filename: &HgPath, - ) -> Result { - let old_entry_opt = self.get(filename)?; - self.with_dmap_mut(|map| map.set_tracked(filename, old_entry_opt)) - } - - pub fn set_untracked( - &mut self, - filename: &HgPath, - ) -> Result { - let old_entry_opt = self.get(filename)?; - match old_entry_opt { - None => Ok(false), - Some(old_entry) => { - if !old_entry.tracked() { - // `DirstateMap::set_untracked` is not a noop if - // already not tracked as it will decrement the - // tracked counters while going down. - return Ok(true); - } - if old_entry.added() { - // Untracking an "added" entry will just result in a - // worthless entry (and other parts of the code will - // complain about it), just drop it entirely. - self.drop_entry_and_copy_source(filename)?; - return Ok(true); - } - if !old_entry.p2_info() { - self.copy_map_remove(filename)?; - } - - self.with_dmap_mut(|map| { - map.set_untracked(filename, old_entry)?; - Ok(true) - }) - } - } - } - - pub fn set_clean( - &mut self, - filename: &HgPath, - mode: u32, - size: u32, - mtime: TruncatedTimestamp, - ) -> Result<(), DirstateError> { - let old_entry = match self.get(filename)? { - None => { - return Err( - DirstateMapError::PathNotFound(filename.into()).into() - ) - } - Some(e) => e, - }; - self.copy_map_remove(filename)?; - self.with_dmap_mut(|map| { - map.set_clean(filename, old_entry, mode, size, mtime) - }) - } - - pub fn set_possibly_dirty( - &mut self, - filename: &HgPath, - ) -> Result<(), DirstateError> { - if self.get(filename)?.is_none() { - return Err(DirstateMapError::PathNotFound(filename.into()).into()); - } - self.with_dmap_mut(|map| map.set_possibly_dirty(filename)) - } - - pub fn reset_state( - &mut self, - reset: DirstateEntryReset, - ) -> Result<(), DirstateError> { - if !(reset.p1_tracked || reset.p2_info || reset.wc_tracked) { - self.drop_entry_and_copy_source(reset.filename)?; - return Ok(()); - } - if !reset.from_empty { - self.copy_map_remove(reset.filename)?; - } - - let old_entry_opt = if reset.from_empty { - None - } else { - self.get(reset.filename)? - }; - - self.with_dmap_mut(|map| { - map.reset_state( - reset.filename, - old_entry_opt, - reset.wc_tracked, - reset.p1_tracked, - reset.p2_info, - reset.has_meaningful_mtime, - reset.parent_file_data_opt, - ) - }) - } - - pub fn drop_entry_and_copy_source( - &mut self, - filename: &HgPath, - ) -> Result<(), DirstateError> { - let was_tracked = self.get(filename)?.map_or(false, |e| e.tracked()); - struct Dropped { - was_tracked: bool, - had_entry: bool, - had_copy_source: bool, - } - - /// If this returns `Ok(Some((dropped, removed)))`, then - /// - /// * `dropped` is about the leaf node that was at `filename` - /// * `removed` is whether this particular level of recursion just - /// removed a node in `nodes`. - fn recur<'on_disk>( - on_disk: &'on_disk [u8], - unreachable_bytes: &mut u32, - nodes: &mut ChildNodes<'on_disk>, - path: &HgPath, - ) -> Result, DirstateV2ParseError> { - let (first_path_component, rest_of_path) = - path.split_first_component(); - let nodes = nodes.make_mut(on_disk, unreachable_bytes)?; - let node = if let Some(node) = nodes.get_mut(first_path_component) - { - node - } else { - return Ok(None); - }; - let dropped; - if let Some(rest) = rest_of_path { - if let Some((d, removed)) = recur( - on_disk, - unreachable_bytes, - &mut node.children, - rest, - )? { - dropped = d; - if dropped.had_entry { - node.descendants_with_entry_count = node - .descendants_with_entry_count - .checked_sub(1) - .expect( - "descendants_with_entry_count should be >= 0", - ); - } - if dropped.was_tracked { - node.tracked_descendants_count = node - .tracked_descendants_count - .checked_sub(1) - .expect( - "tracked_descendants_count should be >= 0", - ); - } - - // Directory caches must be invalidated when removing a - // child node - if removed { - if let NodeData::CachedDirectory { .. } = &node.data { - node.data = NodeData::None - } - } - } else { - return Ok(None); - } - } else { - let entry = node.data.as_entry(); - let was_tracked = entry.map_or(false, |entry| entry.tracked()); - let had_entry = entry.is_some(); - if had_entry { - node.data = NodeData::None - } - let mut had_copy_source = false; - if let Some(source) = &node.copy_source { - DirstateMap::count_dropped_path( - unreachable_bytes, - Cow::Borrowed(source), - ); - had_copy_source = true; - node.copy_source = None - } - dropped = Dropped { - was_tracked, - had_entry, - had_copy_source, - }; - } - // After recursion, for both leaf (rest_of_path is None) nodes and - // parent nodes, remove a node if it just became empty. - let remove = !node.data.has_entry() - && node.copy_source.is_none() - && node.children.is_empty(); - if remove { - let (key, _) = - nodes.remove_entry(first_path_component).unwrap(); - DirstateMap::count_dropped_path( - unreachable_bytes, - Cow::Borrowed(key.full_path()), - ) - } - Ok(Some((dropped, remove))) - } - - self.with_dmap_mut(|map| { - if let Some((dropped, _removed)) = recur( - map.on_disk, - &mut map.unreachable_bytes, - &mut map.root, - filename, - )? { - if dropped.had_entry { - map.nodes_with_entry_count = map - .nodes_with_entry_count - .checked_sub(1) - .expect("nodes_with_entry_count should be >= 0"); - } - if dropped.had_copy_source { - map.nodes_with_copy_source_count = map - .nodes_with_copy_source_count - .checked_sub(1) - .expect("nodes_with_copy_source_count should be >= 0"); - } - } else { - debug_assert!(!was_tracked); - } - Ok(()) - }) - } - - pub fn has_tracked_dir( - &mut self, - directory: &HgPath, - ) -> Result { - self.with_dmap_mut(|map| { - if let Some(node) = map.get_node(directory)? { - // A node without a `DirstateEntry` was created to hold child - // nodes, and is therefore a directory. - let is_dir = node.entry()?.is_none(); - Ok(is_dir && node.tracked_descendants_count() > 0) - } else { - Ok(false) - } - }) - } - - pub fn has_dir( - &mut self, - directory: &HgPath, - ) -> Result { - self.with_dmap_mut(|map| { - if let Some(node) = map.get_node(directory)? { - // A node without a `DirstateEntry` was created to hold child - // nodes, and is therefore a directory. - let is_dir = node.entry()?.is_none(); - Ok(is_dir && node.descendants_with_entry_count() > 0) - } else { - Ok(false) - } - }) - } - - #[logging_timer::time("trace")] - pub fn pack_v1( - &self, - parents: DirstateParents, - ) -> Result, DirstateError> { - let map = self.get_map(); - // Optizimation (to be measured?): pre-compute size to avoid `Vec` - // reallocations - let mut size = parents.as_bytes().len(); - for node in map.iter_nodes() { - let node = node?; - if node.entry()?.is_some() { - size += packed_entry_size( - node.full_path(map.on_disk)?, - node.copy_source(map.on_disk)?, - ); - } - } - - let mut packed = Vec::with_capacity(size); - packed.extend(parents.as_bytes()); - - for node in map.iter_nodes() { - let node = node?; - if let Some(entry) = node.entry()? { - pack_entry( - node.full_path(map.on_disk)?, - &entry, - node.copy_source(map.on_disk)?, - &mut packed, - ); - } - } - Ok(packed) - } - - /// Returns new data and metadata together with whether that data should be - /// appended to the existing data file whose content is at - /// `map.on_disk` (true), instead of written to a new data file - /// (false), and the previous size of data on disk. - #[logging_timer::time("trace")] - pub fn pack_v2( - &self, - write_mode: DirstateMapWriteMode, - ) -> Result<(Vec, on_disk::TreeMetadata, bool, usize), DirstateError> - { - let map = self.get_map(); - on_disk::write(map, write_mode) - } - - /// `callback` allows the caller to process and do something with the - /// results of the status. This is needed to do so efficiently (i.e. - /// without cloning the `DirstateStatus` object with its paths) because - /// we need to borrow from `Self`. - pub fn with_status( - &mut self, - matcher: &(dyn Matcher + Sync), - root_dir: PathBuf, - ignore_files: Vec, - options: StatusOptions, - callback: impl for<'r> FnOnce( - Result<(DirstateStatus<'r>, Vec), StatusError>, - ) -> R, - ) -> R { - self.with_dmap_mut(|map| { - callback(super::status::status( - map, - matcher, - root_dir, - ignore_files, - options, - )) - }) - } - - pub fn copy_map_len(&self) -> usize { - let map = self.get_map(); - map.nodes_with_copy_source_count as usize - } - - pub fn copy_map_iter(&self) -> CopyMapIter<'_> { - let map = self.get_map(); - Box::new(filter_map_results(map.iter_nodes(), move |node| { - Ok(if let Some(source) = node.copy_source(map.on_disk)? { - Some((node.full_path(map.on_disk)?, source)) - } else { - None - }) - })) - } - - pub fn copy_map_contains_key( - &self, - key: &HgPath, - ) -> Result { - let map = self.get_map(); - Ok(if let Some(node) = map.get_node(key)? { - node.has_copy_source() - } else { - false - }) - } - - pub fn copy_map_get( - &self, - key: &HgPath, - ) -> Result, DirstateV2ParseError> { - let map = self.get_map(); - if let Some(node) = map.get_node(key)? { - if let Some(source) = node.copy_source(map.on_disk)? { - return Ok(Some(source)); - } - } - Ok(None) - } - - pub fn copy_map_remove( - &mut self, - key: &HgPath, - ) -> Result, DirstateV2ParseError> { - self.with_dmap_mut(|map| { - let count = &mut map.nodes_with_copy_source_count; - let unreachable_bytes = &mut map.unreachable_bytes; - Ok(DirstateMap::get_node_mut_inner( - map.on_disk, - unreachable_bytes, - &mut map.root, - key, - |_ancestor| {}, - )? - .and_then(|node| { - if let Some(source) = &node.copy_source { - *count = count - .checked_sub(1) - .expect("nodes_with_copy_source_count should be >= 0"); - DirstateMap::count_dropped_path( - unreachable_bytes, - Cow::Borrowed(source), - ); - } - node.copy_source.take().map(Cow::into_owned) - })) - }) - } - - pub fn copy_map_insert( - &mut self, - key: &HgPath, - value: &HgPath, - ) -> Result, DirstateV2ParseError> { - self.with_dmap_mut(|map| { - let node = map.get_or_insert_node(key, |_ancestor| {})?; - let had_copy_source = node.copy_source.is_none(); - let old = node - .copy_source - .replace(value.to_owned().into()) - .map(Cow::into_owned); - if had_copy_source { - map.nodes_with_copy_source_count += 1 - } - Ok(old) - }) - } - - pub fn len(&self) -> usize { - let map = self.get_map(); - map.nodes_with_entry_count as usize - } - - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - pub fn contains_key( - &self, - key: &HgPath, - ) -> Result { - Ok(self.get(key)?.is_some()) - } - - pub fn get( - &self, - key: &HgPath, - ) -> Result, DirstateV2ParseError> { - let map = self.get_map(); - Ok(if let Some(node) = map.get_node(key)? { - node.entry()? - } else { - None - }) - } - - pub fn iter(&self) -> StateMapIter<'_> { - let map = self.get_map(); - Box::new(filter_map_results(map.iter_nodes(), move |node| { - Ok(if let Some(entry) = node.entry()? { - Some((node.full_path(map.on_disk)?, entry)) - } else { - None - }) - })) - } - - pub fn iter_tracked_dirs( - &mut self, - ) -> Result< - Box< - dyn Iterator> - + Send - + '_, - >, - DirstateError, - > { - let map = self.get_map(); - let on_disk = map.on_disk; - Ok(Box::new(filter_map_results( - map.iter_nodes(), - move |node| { - Ok(if node.tracked_descendants_count() > 0 { - Some(node.full_path(on_disk)?) - } else { - None - }) - }, - ))) - } - - /// Only public because it needs to be exposed to the Python layer. - /// It is not the full `setparents` logic, only the parts that mutate the - /// entries. - pub fn setparents_fixup( - &mut self, - ) -> Result, DirstateV2ParseError> { - // XXX - // All the copying and re-querying is quite inefficient, but this is - // still a lot better than doing it from Python. - // - // The better solution is to develop a mechanism for `iter_mut`, - // which will be a lot more involved: we're dealing with a lazy, - // append-mostly, tree-like data structure. This will do for now. - let mut copies = vec![]; - let mut files_with_p2_info = vec![]; - for res in self.iter() { - let (path, entry) = res?; - if entry.p2_info() { - files_with_p2_info.push(path.to_owned()) - } - } - self.with_dmap_mut(|map| { - for path in files_with_p2_info.iter() { - let node = map.get_or_insert_node(path, |_| {})?; - let entry = - node.data.as_entry_mut().expect("entry should exist"); - entry.drop_merge_data(); - if let Some(source) = node.copy_source.take().as_deref() { - copies.push((path.to_owned(), source.to_owned())); - } - } - Ok(copies) - }) - } - - pub fn debug_iter( - &self, - all: bool, - ) -> Box< - dyn Iterator> - + Send - + '_, - > { - let map = self.get_map(); - Box::new(filter_map_results(map.iter_nodes(), move |node| { - let debug_tuple = if let Some(entry) = node.entry()? { - entry.debug_tuple() - } else if !all { - return Ok(None); - } else if let Some(mtime) = node.cached_directory_mtime()? { - (b' ', 0, -1, mtime.truncated_seconds() as i32) - } else { - (b' ', 0, -1, -1) - }; - Ok(Some((node.full_path(map.on_disk)?, debug_tuple))) - })) - } -} -#[cfg(test)] -mod tests { - use super::*; - - /// Shortcut to return tracked descendants of a path. - /// Panics if the path does not exist. - fn tracked_descendants(map: &OwningDirstateMap, path: &[u8]) -> u32 { - let path = dbg!(HgPath::new(path)); - let node = map.get_map().get_node(path); - node.unwrap().unwrap().tracked_descendants_count() - } - - /// Shortcut to return descendants with an entry. - /// Panics if the path does not exist. - fn descendants_with_an_entry(map: &OwningDirstateMap, path: &[u8]) -> u32 { - let path = dbg!(HgPath::new(path)); - let node = map.get_map().get_node(path); - node.unwrap().unwrap().descendants_with_entry_count() - } - - fn assert_does_not_exist(map: &OwningDirstateMap, path: &[u8]) { - let path = dbg!(HgPath::new(path)); - let node = map.get_map().get_node(path); - assert!(node.unwrap().is_none()); - } - - /// Shortcut for path creation in tests - fn p(b: &[u8]) -> &HgPath { - HgPath::new(b) - } - - /// Test the very simple case a single tracked file - #[test] - fn test_tracked_descendants_simple() -> Result<(), DirstateError> { - let mut map = OwningDirstateMap::new_empty(vec![], None); - assert_eq!(map.len(), 0); - - map.set_tracked(p(b"some/nested/path"))?; - - assert_eq!(map.len(), 1); - assert_eq!(tracked_descendants(&map, b"some"), 1); - assert_eq!(tracked_descendants(&map, b"some/nested"), 1); - assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0); - - map.set_untracked(p(b"some/nested/path"))?; - assert_eq!(map.len(), 0); - assert!(map.get_map().get_node(p(b"some"))?.is_none()); - - Ok(()) - } - - /// Test the simple case of all tracked, but multiple files - #[test] - fn test_tracked_descendants_multiple() -> Result<(), DirstateError> { - let mut map = OwningDirstateMap::new_empty(vec![], None); - - map.set_tracked(p(b"some/nested/path"))?; - map.set_tracked(p(b"some/nested/file"))?; - // one layer without any files to test deletion cascade - map.set_tracked(p(b"some/other/nested/path"))?; - map.set_tracked(p(b"root_file"))?; - map.set_tracked(p(b"some/file"))?; - map.set_tracked(p(b"some/file2"))?; - map.set_tracked(p(b"some/file3"))?; - - assert_eq!(map.len(), 7); - assert_eq!(tracked_descendants(&map, b"some"), 6); - assert_eq!(tracked_descendants(&map, b"some/nested"), 2); - assert_eq!(tracked_descendants(&map, b"some/other"), 1); - assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); - assert_eq!(tracked_descendants(&map, b"some/nested/path"), 0); - - map.set_untracked(p(b"some/nested/path"))?; - assert_eq!(map.len(), 6); - assert_eq!(tracked_descendants(&map, b"some"), 5); - assert_eq!(tracked_descendants(&map, b"some/nested"), 1); - assert_eq!(tracked_descendants(&map, b"some/other"), 1); - assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); - - map.set_untracked(p(b"some/nested/file"))?; - assert_eq!(map.len(), 5); - assert_eq!(tracked_descendants(&map, b"some"), 4); - assert_eq!(tracked_descendants(&map, b"some/other"), 1); - assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); - assert_does_not_exist(&map, b"some_nested"); - - map.set_untracked(p(b"some/other/nested/path"))?; - assert_eq!(map.len(), 4); - assert_eq!(tracked_descendants(&map, b"some"), 3); - assert_does_not_exist(&map, b"some/other"); - - map.set_untracked(p(b"root_file"))?; - assert_eq!(map.len(), 3); - assert_eq!(tracked_descendants(&map, b"some"), 3); - assert_does_not_exist(&map, b"root_file"); - - map.set_untracked(p(b"some/file"))?; - assert_eq!(map.len(), 2); - assert_eq!(tracked_descendants(&map, b"some"), 2); - assert_does_not_exist(&map, b"some/file"); - - map.set_untracked(p(b"some/file2"))?; - assert_eq!(map.len(), 1); - assert_eq!(tracked_descendants(&map, b"some"), 1); - assert_does_not_exist(&map, b"some/file2"); - - map.set_untracked(p(b"some/file3"))?; - assert_eq!(map.len(), 0); - assert_does_not_exist(&map, b"some/file3"); - - Ok(()) - } - - /// Check with a mix of tracked and non-tracked items - #[test] - fn test_tracked_descendants_different() -> Result<(), DirstateError> { - let mut map = OwningDirstateMap::new_empty(vec![], None); - - // A file that was just added - map.set_tracked(p(b"some/nested/path"))?; - // This has no information, the dirstate should ignore it - let reset = DirstateEntryReset { - filename: p(b"some/file"), - wc_tracked: false, - p1_tracked: false, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - assert_does_not_exist(&map, b"some/file"); - - // A file that was removed - let reset = DirstateEntryReset { - filename: p(b"some/nested/file"), - wc_tracked: false, - p1_tracked: true, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - assert!(!map.get(p(b"some/nested/file"))?.unwrap().tracked()); - // Only present in p2 - let reset = DirstateEntryReset { - filename: p(b"some/file3"), - wc_tracked: false, - p1_tracked: false, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - assert!(!map.get(p(b"some/file3"))?.unwrap().tracked()); - // A file that was merged - let reset = DirstateEntryReset { - filename: p(b"root_file"), - wc_tracked: true, - p1_tracked: true, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - assert!(map.get(p(b"root_file"))?.unwrap().tracked()); - // A file that is added, with info from p2 - // XXX is that actually possible? - let reset = DirstateEntryReset { - filename: p(b"some/file2"), - wc_tracked: true, - p1_tracked: false, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - assert!(map.get(p(b"some/file2"))?.unwrap().tracked()); - // A clean file - // One layer without any files to test deletion cascade - let reset = DirstateEntryReset { - filename: p(b"some/other/nested/path"), - wc_tracked: true, - p1_tracked: true, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - assert!(map.get(p(b"some/other/nested/path"))?.unwrap().tracked()); - - assert_eq!(map.len(), 6); - assert_eq!(tracked_descendants(&map, b"some"), 3); - assert_eq!(descendants_with_an_entry(&map, b"some"), 5); - assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1); - assert_eq!(tracked_descendants(&map, b"some/other/nested/path"), 0); - assert_eq!( - descendants_with_an_entry(&map, b"some/other/nested/path"), - 0 - ); - assert_eq!(tracked_descendants(&map, b"some/nested"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2); - - // might as well check this - map.set_untracked(p(b"path/does/not/exist"))?; - assert_eq!(map.len(), 6); - - map.set_untracked(p(b"some/other/nested/path"))?; - // It is set untracked but not deleted since it held other information - assert_eq!(map.len(), 6); - assert_eq!(tracked_descendants(&map, b"some"), 2); - assert_eq!(descendants_with_an_entry(&map, b"some"), 5); - assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1); - assert_eq!(tracked_descendants(&map, b"some/nested"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2); - - map.set_untracked(p(b"some/nested/path"))?; - // It is set untracked *and* deleted since it was only added - assert_eq!(map.len(), 5); - assert_eq!(tracked_descendants(&map, b"some"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some"), 4); - assert_eq!(tracked_descendants(&map, b"some/nested"), 0); - assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 1); - assert_does_not_exist(&map, b"some/nested/path"); - - map.set_untracked(p(b"root_file"))?; - // Untracked but not deleted - assert_eq!(map.len(), 5); - assert!(map.get(p(b"root_file"))?.is_some()); - - map.set_untracked(p(b"some/file2"))?; - assert_eq!(map.len(), 5); - assert_eq!(tracked_descendants(&map, b"some"), 0); - assert!(map.get(p(b"some/file2"))?.is_some()); - - map.set_untracked(p(b"some/file3"))?; - assert_eq!(map.len(), 5); - assert_eq!(tracked_descendants(&map, b"some"), 0); - assert!(map.get(p(b"some/file3"))?.is_some()); - - Ok(()) - } - - /// Check that copies counter is correctly updated - #[test] - fn test_copy_source() -> Result<(), DirstateError> { - let mut map = OwningDirstateMap::new_empty(vec![], None); - - // Clean file - let reset = DirstateEntryReset { - filename: p(b"files/clean"), - wc_tracked: true, - p1_tracked: true, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // Merged file - let reset = DirstateEntryReset { - filename: p(b"files/from_p2"), - wc_tracked: true, - p1_tracked: true, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // Removed file - let reset = DirstateEntryReset { - filename: p(b"removed"), - wc_tracked: false, - p1_tracked: true, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // Added file - let reset = DirstateEntryReset { - filename: p(b"files/added"), - wc_tracked: true, - p1_tracked: false, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // Add copy - map.copy_map_insert(p(b"files/clean"), p(b"clean_copy_source"))?; - assert_eq!(map.copy_map_len(), 1); - - // Copy override - map.copy_map_insert(p(b"files/clean"), p(b"other_clean_copy_source"))?; - assert_eq!(map.copy_map_len(), 1); - - // Multiple copies - map.copy_map_insert(p(b"removed"), p(b"removed_copy_source"))?; - assert_eq!(map.copy_map_len(), 2); - - map.copy_map_insert(p(b"files/added"), p(b"added_copy_source"))?; - assert_eq!(map.copy_map_len(), 3); - - // Added, so the entry is completely removed - map.set_untracked(p(b"files/added"))?; - assert_does_not_exist(&map, b"files/added"); - assert_eq!(map.copy_map_len(), 2); - - // Removed, so the entry is kept around, so is its copy - map.set_untracked(p(b"removed"))?; - assert!(map.get(p(b"removed"))?.is_some()); - assert_eq!(map.copy_map_len(), 2); - - // Clean, so the entry is kept around, but not its copy - map.set_untracked(p(b"files/clean"))?; - assert!(map.get(p(b"files/clean"))?.is_some()); - assert_eq!(map.copy_map_len(), 1); - - map.copy_map_insert(p(b"files/from_p2"), p(b"from_p2_copy_source"))?; - assert_eq!(map.copy_map_len(), 2); - - // Info from p2, so its copy source info is kept around - map.set_untracked(p(b"files/from_p2"))?; - assert!(map.get(p(b"files/from_p2"))?.is_some()); - assert_eq!(map.copy_map_len(), 2); - - Ok(()) - } - - /// Test with "on disk" data. For the sake of this test, the "on disk" data - /// does not actually come from the disk, but it's opaque to the code being - /// tested. - #[test] - fn test_on_disk() -> Result<(), DirstateError> { - // First let's create some data to put "on disk" - let mut map = OwningDirstateMap::new_empty(vec![], None); - - // A file that was just added - map.set_tracked(p(b"some/nested/added"))?; - map.copy_map_insert(p(b"some/nested/added"), p(b"added_copy_source"))?; - - // A file that was removed - let reset = DirstateEntryReset { - filename: p(b"some/nested/removed"), - wc_tracked: false, - p1_tracked: true, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // Only present in p2 - let reset = DirstateEntryReset { - filename: p(b"other/p2_info_only"), - wc_tracked: false, - p1_tracked: false, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - map.copy_map_insert( - p(b"other/p2_info_only"), - p(b"other/p2_info_copy_source"), - )?; - // A file that was merged - let reset = DirstateEntryReset { - filename: p(b"merged"), - wc_tracked: true, - p1_tracked: true, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // A file that is added, with info from p2 - // XXX is that actually possible? - let reset = DirstateEntryReset { - filename: p(b"other/added_with_p2"), - wc_tracked: true, - p1_tracked: false, - p2_info: true, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - // One layer without any files to test deletion cascade - // A clean file - let reset = DirstateEntryReset { - filename: p(b"some/other/nested/clean"), - wc_tracked: true, - p1_tracked: true, - p2_info: false, - has_meaningful_mtime: false, - parent_file_data_opt: None, - from_empty: false, - }; - map.reset_state(reset)?; - - let (packed, metadata, _should_append, _old_data_size) = - map.pack_v2(DirstateMapWriteMode::ForceNewDataFile)?; - let packed_len = packed.len(); - assert!(packed_len > 0); - - // Recreate "from disk" - let mut map = OwningDirstateMap::new_v2( - packed, - packed_len, - metadata.as_bytes(), - vec![], - None, - )?; - - // Check that everything is accounted for - assert!(map.contains_key(p(b"some/nested/added"))?); - assert!(map.contains_key(p(b"some/nested/removed"))?); - assert!(map.contains_key(p(b"merged"))?); - assert!(map.contains_key(p(b"other/p2_info_only"))?); - assert!(map.contains_key(p(b"other/added_with_p2"))?); - assert!(map.contains_key(p(b"some/other/nested/clean"))?); - assert_eq!( - map.copy_map_get(p(b"some/nested/added"))?, - Some(p(b"added_copy_source")) - ); - assert_eq!( - map.copy_map_get(p(b"other/p2_info_only"))?, - Some(p(b"other/p2_info_copy_source")) - ); - assert_eq!(tracked_descendants(&map, b"some"), 2); - assert_eq!(descendants_with_an_entry(&map, b"some"), 3); - assert_eq!(tracked_descendants(&map, b"other"), 1); - assert_eq!(descendants_with_an_entry(&map, b"other"), 2); - assert_eq!(tracked_descendants(&map, b"some/other"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/other"), 1); - assert_eq!(tracked_descendants(&map, b"some/other/nested"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/other/nested"), 1); - assert_eq!(tracked_descendants(&map, b"some/nested"), 1); - assert_eq!(descendants_with_an_entry(&map, b"some/nested"), 2); - assert_eq!(map.len(), 6); - assert_eq!(map.get_map().unreachable_bytes, 0); - assert_eq!(map.copy_map_len(), 2); - - // Shouldn't change anything since it's already not tracked - map.set_untracked(p(b"some/nested/removed"))?; - assert_eq!(map.get_map().unreachable_bytes, 0); - - if let ChildNodes::InMemory(_) = map.get_map().root { - panic!("root should not have been mutated") - } - // We haven't mutated enough (nothing, actually), we should still be in - // the append strategy - assert!(map.get_map().write_should_append()); - - // But this mutates the structure, so there should be unreachable_bytes - assert!(map.set_untracked(p(b"some/nested/added"))?); - let unreachable_bytes = map.get_map().unreachable_bytes; - assert!(unreachable_bytes > 0); - - if let ChildNodes::OnDisk(_) = map.get_map().root { - panic!("root should have been mutated") - } - - // This should not mutate the structure either, since `root` has - // already been mutated along with its direct children. - map.set_untracked(p(b"merged"))?; - assert_eq!(map.get_map().unreachable_bytes, unreachable_bytes); - - if let NodeRef::InMemory(_, _) = - map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() - { - panic!("'other/added_with_p2' should not have been mutated") - } - // But this should, since it's in a different path - // than `some/nested/add` - map.set_untracked(p(b"other/added_with_p2"))?; - assert!(map.get_map().unreachable_bytes > unreachable_bytes); - - if let NodeRef::OnDisk(_) = - map.get_map().get_node(p(b"other/added_with_p2"))?.unwrap() - { - panic!("'other/added_with_p2' should have been mutated") - } - - // We have rewritten most of the tree, we should create a new file - assert!(!map.get_map().write_should_append()); - - Ok(()) - } -} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate_tree/on_disk.rs --- a/rust/hg-core/src/dirstate_tree/on_disk.rs Mon Nov 04 10:38:17 2024 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,940 +0,0 @@ -//! The "version 2" disk representation of the dirstate -//! -//! See `mercurial/helptext/internals/dirstate-v2.txt` - -use crate::dirstate::{DirstateV2Data, TruncatedTimestamp}; -use crate::dirstate_tree::dirstate_map::DirstateVersion; -use crate::dirstate_tree::dirstate_map::{ - self, DirstateMap, DirstateMapWriteMode, NodeRef, -}; -use crate::dirstate_tree::path_with_basename::WithBasename; -use crate::errors::{HgError, IoResultExt}; -use crate::repo::Repo; -use crate::requirements::DIRSTATE_TRACKED_HINT_V1; -use crate::utils::hg_path::HgPath; -use crate::DirstateEntry; -use crate::DirstateError; -use crate::DirstateParents; -use bitflags::bitflags; -use bytes_cast::unaligned::{U16Be, U32Be}; -use bytes_cast::BytesCast; -use format_bytes::format_bytes; -use rand::Rng; -use std::borrow::Cow; -use std::fmt::Write; -use uuid::Uuid; - -use super::dirstate_map::DirstateIdentity; - -/// Added at the start of `.hg/dirstate` when the "v2" format is used. -/// This a redundant sanity check more than an actual "magic number" since -/// `.hg/requires` already governs which format should be used. -pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; - -/// Keep space for 256-bit hashes -const STORED_NODE_ID_BYTES: usize = 32; - -/// … even though only 160 bits are used for now, with SHA-1 -const USED_NODE_ID_BYTES: usize = 20; - -pub(super) const IGNORE_PATTERNS_HASH_LEN: usize = 20; -pub(super) type IgnorePatternsHash = [u8; IGNORE_PATTERNS_HASH_LEN]; - -/// Must match constants of the same names in `mercurial/dirstateutils/v2.py` -const TREE_METADATA_SIZE: usize = 44; -const NODE_SIZE: usize = 44; - -/// Make sure that size-affecting changes are made knowingly -#[allow(unused)] -fn static_assert_size_of() { - let _ = std::mem::transmute::; - let _ = std::mem::transmute::; - let _ = std::mem::transmute::; -} - -// Must match `HEADER` in `mercurial/dirstateutils/docket.py` -#[derive(BytesCast)] -#[repr(C)] -struct DocketHeader { - marker: [u8; V2_FORMAT_MARKER.len()], - parent_1: [u8; STORED_NODE_ID_BYTES], - parent_2: [u8; STORED_NODE_ID_BYTES], - - metadata: TreeMetadata, - - /// Counted in bytes - data_size: Size, - - uuid_size: u8, -} - -pub struct Docket<'on_disk> { - header: &'on_disk DocketHeader, - pub uuid: &'on_disk [u8], -} - -/// Fields are documented in the *Tree metadata in the docket file* -/// section of `mercurial/helptext/internals/dirstate-v2.txt` -#[derive(BytesCast)] -#[repr(C)] -pub struct TreeMetadata { - root_nodes: ChildNodes, - nodes_with_entry_count: Size, - nodes_with_copy_source_count: Size, - unreachable_bytes: Size, - unused: [u8; 4], - - /// See *Optional hash of ignore patterns* section of - /// `mercurial/helptext/internals/dirstate-v2.txt` - ignore_patterns_hash: IgnorePatternsHash, -} - -/// Fields are documented in the *The data file format* -/// section of `mercurial/helptext/internals/dirstate-v2.txt` -#[derive(BytesCast, Debug)] -#[repr(C)] -pub(super) struct Node { - full_path: PathSlice, - - /// In bytes from `self.full_path.start` - base_name_start: PathSize, - - copy_source: OptPathSlice, - children: ChildNodes, - pub(super) descendants_with_entry_count: Size, - pub(super) tracked_descendants_count: Size, - flags: U16Be, - size: U32Be, - mtime: PackedTruncatedTimestamp, -} - -bitflags! { - #[repr(C)] - struct Flags: u16 { - const WDIR_TRACKED = 1 << 0; - const P1_TRACKED = 1 << 1; - const P2_INFO = 1 << 2; - const MODE_EXEC_PERM = 1 << 3; - const MODE_IS_SYMLINK = 1 << 4; - const HAS_FALLBACK_EXEC = 1 << 5; - const FALLBACK_EXEC = 1 << 6; - const HAS_FALLBACK_SYMLINK = 1 << 7; - const FALLBACK_SYMLINK = 1 << 8; - const EXPECTED_STATE_IS_MODIFIED = 1 << 9; - const HAS_MODE_AND_SIZE = 1 <<10; - const HAS_MTIME = 1 <<11; - const MTIME_SECOND_AMBIGUOUS = 1 << 12; - const DIRECTORY = 1 <<13; - const ALL_UNKNOWN_RECORDED = 1 <<14; - const ALL_IGNORED_RECORDED = 1 <<15; - } -} - -/// Duration since the Unix epoch -#[derive(BytesCast, Copy, Clone, Debug)] -#[repr(C)] -struct PackedTruncatedTimestamp { - truncated_seconds: U32Be, - nanoseconds: U32Be, -} - -/// Counted in bytes from the start of the file -/// -/// NOTE: not supporting `.hg/dirstate` files larger than 4 GiB. -type Offset = U32Be; - -/// Counted in number of items -/// -/// NOTE: we choose not to support counting more than 4 billion nodes anywhere. -type Size = U32Be; - -/// Counted in bytes -/// -/// NOTE: we choose not to support file names/paths longer than 64 KiB. -type PathSize = U16Be; - -/// A contiguous sequence of `len` times `Node`, representing the child nodes -/// of either some other node or of the repository root. -/// -/// Always sorted by ascending `full_path`, to allow binary search. -/// Since nodes with the same parent nodes also have the same parent path, -/// only the `base_name`s need to be compared during binary search. -#[derive(BytesCast, Copy, Clone, Debug)] -#[repr(C)] -struct ChildNodes { - start: Offset, - len: Size, -} - -/// A `HgPath` of `len` bytes -#[derive(BytesCast, Copy, Clone, Debug)] -#[repr(C)] -struct PathSlice { - start: Offset, - len: PathSize, -} - -/// Either nothing if `start == 0`, or a `HgPath` of `len` bytes -type OptPathSlice = PathSlice; - -/// Unexpected file format found in `.hg/dirstate` with the "v2" format. -/// -/// This should only happen if Mercurial is buggy or a repository is corrupted. -#[derive(Debug)] -pub struct DirstateV2ParseError { - message: String, -} - -impl DirstateV2ParseError { - pub fn new>(message: S) -> Self { - Self { - message: message.into(), - } - } -} - -impl From for HgError { - fn from(e: DirstateV2ParseError) -> Self { - HgError::corrupted(format!("dirstate-v2 parse error: {}", e.message)) - } -} - -impl From for crate::DirstateError { - fn from(error: DirstateV2ParseError) -> Self { - HgError::from(error).into() - } -} - -impl TreeMetadata { - pub fn as_bytes(&self) -> &[u8] { - BytesCast::as_bytes(self) - } -} - -impl<'on_disk> Docket<'on_disk> { - /// Generate the identifier for a new data file - /// - /// TODO: support the `HGTEST_UUIDFILE` environment variable. - /// See `mercurial/revlogutils/docket.py` - pub fn new_uid() -> String { - const ID_LENGTH: usize = 8; - let mut id = String::with_capacity(ID_LENGTH); - let mut rng = rand::thread_rng(); - for _ in 0..ID_LENGTH { - // One random hexadecimal digit. - // `unwrap` never panics because `impl Write for String` - // never returns an error. - write!(&mut id, "{:x}", rng.gen_range(0..16)).unwrap(); - } - id - } - - pub fn serialize( - parents: DirstateParents, - tree_metadata: TreeMetadata, - data_size: u64, - uuid: &[u8], - ) -> Result, std::num::TryFromIntError> { - let header = DocketHeader { - marker: *V2_FORMAT_MARKER, - parent_1: parents.p1.pad_to_256_bits(), - parent_2: parents.p2.pad_to_256_bits(), - metadata: tree_metadata, - data_size: u32::try_from(data_size)?.into(), - uuid_size: uuid.len().try_into()?, - }; - let header = header.as_bytes(); - let mut docket = Vec::with_capacity(header.len() + uuid.len()); - docket.extend_from_slice(header); - docket.extend_from_slice(uuid); - Ok(docket) - } - - pub fn parents(&self) -> DirstateParents { - use crate::Node; - let p1 = Node::try_from(&self.header.parent_1[..USED_NODE_ID_BYTES]) - .unwrap(); - let p2 = Node::try_from(&self.header.parent_2[..USED_NODE_ID_BYTES]) - .unwrap(); - DirstateParents { p1, p2 } - } - - pub fn tree_metadata(&self) -> &[u8] { - self.header.metadata.as_bytes() - } - - pub fn data_size(&self) -> usize { - // This `unwrap` could only panic on a 16-bit CPU - self.header.data_size.get().try_into().unwrap() - } - - pub fn data_filename(&self) -> String { - String::from_utf8(format_bytes!(b"dirstate.{}", self.uuid)).unwrap() - } -} - -pub fn read_docket( - on_disk: &[u8], -) -> Result, DirstateV2ParseError> { - let (header, uuid) = DocketHeader::from_bytes(on_disk).map_err(|e| { - DirstateV2ParseError::new(format!("when reading docket, {}", e)) - })?; - let uuid_size = header.uuid_size as usize; - if header.marker == *V2_FORMAT_MARKER && uuid.len() == uuid_size { - Ok(Docket { header, uuid }) - } else { - Err(DirstateV2ParseError::new( - "invalid format marker or uuid size", - )) - } -} - -pub(super) fn read<'on_disk>( - on_disk: &'on_disk [u8], - metadata: &[u8], - uuid: Vec, - identity: Option, -) -> Result, DirstateV2ParseError> { - if on_disk.is_empty() { - let mut map = DirstateMap::empty(on_disk); - map.identity = identity; - map.old_uuid = Some(uuid); - map.dirstate_version = DirstateVersion::V2; - return Ok(map); - } - let (meta, _) = TreeMetadata::from_bytes(metadata).map_err(|e| { - DirstateV2ParseError::new(format!("when parsing tree metadata, {}", e)) - })?; - let dirstate_map = DirstateMap { - on_disk, - root: dirstate_map::ChildNodes::OnDisk( - read_nodes(on_disk, meta.root_nodes).map_err(|mut e| { - e.message = format!("{}, when reading root notes", e.message); - e - })?, - ), - nodes_with_entry_count: meta.nodes_with_entry_count.get(), - nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(), - ignore_patterns_hash: meta.ignore_patterns_hash, - unreachable_bytes: meta.unreachable_bytes.get(), - old_data_size: on_disk.len(), - old_uuid: Some(uuid), - identity, - dirstate_version: DirstateVersion::V2, - write_mode: DirstateMapWriteMode::Auto, - use_tracked_hint: false, - }; - Ok(dirstate_map) -} - -impl Node { - pub(super) fn full_path<'on_disk>( - &self, - on_disk: &'on_disk [u8], - ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { - read_hg_path(on_disk, self.full_path) - } - - pub(super) fn base_name_start( - &self, - ) -> Result { - let start = self.base_name_start.get(); - if start < self.full_path.len.get() { - let start = usize::from(start); - Ok(start) - } else { - Err(DirstateV2ParseError::new("not enough bytes for base name")) - } - } - - pub(super) fn base_name<'on_disk>( - &self, - on_disk: &'on_disk [u8], - ) -> Result<&'on_disk HgPath, DirstateV2ParseError> { - let full_path = self.full_path(on_disk)?; - let base_name_start = self.base_name_start()?; - Ok(HgPath::new(&full_path.as_bytes()[base_name_start..])) - } - - pub(super) fn path<'on_disk>( - &self, - on_disk: &'on_disk [u8], - ) -> Result, DirstateV2ParseError> { - Ok(WithBasename::from_raw_parts( - Cow::Borrowed(self.full_path(on_disk)?), - self.base_name_start()?, - )) - } - - pub(super) fn has_copy_source(&self) -> bool { - self.copy_source.start.get() != 0 - } - - pub(super) fn copy_source<'on_disk>( - &self, - on_disk: &'on_disk [u8], - ) -> Result, DirstateV2ParseError> { - Ok(if self.has_copy_source() { - Some(read_hg_path(on_disk, self.copy_source)?) - } else { - None - }) - } - - fn flags(&self) -> Flags { - Flags::from_bits_truncate(self.flags.get()) - } - - fn has_entry(&self) -> bool { - self.flags().intersects( - Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, - ) - } - - pub(super) fn node_data( - &self, - ) -> Result { - if self.has_entry() { - Ok(dirstate_map::NodeData::Entry(self.assume_entry()?)) - } else if let Some(mtime) = self.cached_directory_mtime()? { - Ok(dirstate_map::NodeData::CachedDirectory { mtime }) - } else { - Ok(dirstate_map::NodeData::None) - } - } - - pub(super) fn cached_directory_mtime( - &self, - ) -> Result, DirstateV2ParseError> { - // For now we do not have code to handle the absence of - // ALL_UNKNOWN_RECORDED, so we ignore the mtime if the flag is - // unset. - if self.flags().contains(Flags::DIRECTORY) - && self.flags().contains(Flags::HAS_MTIME) - && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) - { - Ok(Some(self.mtime()?)) - } else { - Ok(None) - } - } - - fn synthesize_unix_mode(&self) -> u32 { - // Some platforms' libc don't have the same type (MacOS uses i32 here) - #[allow(clippy::unnecessary_cast)] - let file_type = if self.flags().contains(Flags::MODE_IS_SYMLINK) { - libc::S_IFLNK as u32 - } else { - libc::S_IFREG as u32 - }; - let permissions = if self.flags().contains(Flags::MODE_EXEC_PERM) { - 0o755 - } else { - 0o644 - }; - file_type | permissions - } - - fn mtime(&self) -> Result { - let mut m: TruncatedTimestamp = self.mtime.try_into()?; - if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { - m.second_ambiguous = true; - } - Ok(m) - } - - fn assume_entry(&self) -> Result { - // TODO: convert through raw bits instead? - let wc_tracked = self.flags().contains(Flags::WDIR_TRACKED); - let p1_tracked = self.flags().contains(Flags::P1_TRACKED); - let p2_info = self.flags().contains(Flags::P2_INFO); - let mode_size = if self.flags().contains(Flags::HAS_MODE_AND_SIZE) - && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) - { - Some((self.synthesize_unix_mode(), self.size.into())) - } else { - None - }; - let mtime = if self.flags().contains(Flags::HAS_MTIME) - && !self.flags().contains(Flags::DIRECTORY) - && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) - { - Some(self.mtime()?) - } else { - None - }; - let fallback_exec = if self.flags().contains(Flags::HAS_FALLBACK_EXEC) - { - Some(self.flags().contains(Flags::FALLBACK_EXEC)) - } else { - None - }; - let fallback_symlink = - if self.flags().contains(Flags::HAS_FALLBACK_SYMLINK) { - Some(self.flags().contains(Flags::FALLBACK_SYMLINK)) - } else { - None - }; - Ok(DirstateEntry::from_v2_data(DirstateV2Data { - wc_tracked, - p1_tracked, - p2_info, - mode_size, - mtime, - fallback_exec, - fallback_symlink, - })) - } - - pub(super) fn entry( - &self, - ) -> Result, DirstateV2ParseError> { - if self.has_entry() { - Ok(Some(self.assume_entry()?)) - } else { - Ok(None) - } - } - - pub(super) fn children<'on_disk>( - &self, - on_disk: &'on_disk [u8], - ) -> Result<&'on_disk [Node], DirstateV2ParseError> { - read_nodes(on_disk, self.children) - } - - pub(super) fn to_in_memory_node<'on_disk>( - &self, - on_disk: &'on_disk [u8], - ) -> Result, DirstateV2ParseError> { - Ok(dirstate_map::Node { - children: dirstate_map::ChildNodes::OnDisk( - self.children(on_disk)?, - ), - copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), - data: self.node_data()?, - descendants_with_entry_count: self - .descendants_with_entry_count - .get(), - tracked_descendants_count: self.tracked_descendants_count.get(), - }) - } - - fn from_dirstate_entry( - entry: &DirstateEntry, - ) -> (Flags, U32Be, PackedTruncatedTimestamp) { - let DirstateV2Data { - wc_tracked, - p1_tracked, - p2_info, - mode_size: mode_size_opt, - mtime: mtime_opt, - fallback_exec, - fallback_symlink, - } = entry.v2_data(); - // TODO: convert through raw flag bits instead? - let mut flags = Flags::empty(); - flags.set(Flags::WDIR_TRACKED, wc_tracked); - flags.set(Flags::P1_TRACKED, p1_tracked); - flags.set(Flags::P2_INFO, p2_info); - // Some platforms' libc don't have the same type (MacOS uses i32 here) - #[allow(clippy::unnecessary_cast)] - let size = if let Some((m, s)) = mode_size_opt { - let exec_perm = m & (libc::S_IXUSR as u32) != 0; - let is_symlink = m & (libc::S_IFMT as u32) == libc::S_IFLNK as u32; - flags.set(Flags::MODE_EXEC_PERM, exec_perm); - flags.set(Flags::MODE_IS_SYMLINK, is_symlink); - flags.insert(Flags::HAS_MODE_AND_SIZE); - s.into() - } else { - 0.into() - }; - let mtime = if let Some(m) = mtime_opt { - flags.insert(Flags::HAS_MTIME); - if m.second_ambiguous { - flags.insert(Flags::MTIME_SECOND_AMBIGUOUS); - }; - m.into() - } else { - PackedTruncatedTimestamp::null() - }; - if let Some(f_exec) = fallback_exec { - flags.insert(Flags::HAS_FALLBACK_EXEC); - if f_exec { - flags.insert(Flags::FALLBACK_EXEC); - } - } - if let Some(f_symlink) = fallback_symlink { - flags.insert(Flags::HAS_FALLBACK_SYMLINK); - if f_symlink { - flags.insert(Flags::FALLBACK_SYMLINK); - } - } - (flags, size, mtime) - } -} - -fn read_hg_path( - on_disk: &[u8], - slice: PathSlice, -) -> Result<&HgPath, DirstateV2ParseError> { - read_slice(on_disk, slice.start, slice.len.get()).map(HgPath::new) -} - -fn read_nodes( - on_disk: &[u8], - slice: ChildNodes, -) -> Result<&[Node], DirstateV2ParseError> { - read_slice(on_disk, slice.start, slice.len.get()) -} - -fn read_slice( - on_disk: &[u8], - start: Offset, - len: Len, -) -> Result<&[T], DirstateV2ParseError> -where - T: BytesCast, - Len: TryInto, -{ - // Either `usize::MAX` would result in "out of bounds" error since a single - // `&[u8]` cannot occupy the entire addess space. - let start = start.get().try_into().unwrap_or(usize::MAX); - let len = len.try_into().unwrap_or(usize::MAX); - let bytes = match on_disk.get(start..) { - Some(bytes) => bytes, - None => { - return Err(DirstateV2ParseError::new( - "not enough bytes from disk", - )) - } - }; - T::slice_from_bytes(bytes, len) - .map_err(|e| { - DirstateV2ParseError::new(format!("when reading a slice, {}", e)) - }) - .map(|(slice, _rest)| slice) -} - -/// Returns new data and metadata, together with whether that data should be -/// appended to the existing data file whose content is at -/// `dirstate_map.on_disk` (true), instead of written to a new data file -/// (false), and the previous size of data on disk. -pub(super) fn write( - dirstate_map: &DirstateMap, - write_mode: DirstateMapWriteMode, -) -> Result<(Vec, TreeMetadata, bool, usize), DirstateError> { - let append = match write_mode { - DirstateMapWriteMode::Auto => dirstate_map.write_should_append(), - DirstateMapWriteMode::ForceNewDataFile => false, - DirstateMapWriteMode::ForceAppend => true, - }; - if append { - log::trace!("appending to the dirstate data file"); - } else { - log::trace!("creating new dirstate data file"); - } - - // This ignores the space for paths, and for nodes without an entry. - // TODO: better estimate? Skip the `Vec` and write to a file directly? - let size_guess = std::mem::size_of::() - * dirstate_map.nodes_with_entry_count as usize; - - let mut writer = Writer { - dirstate_map, - append, - out: Vec::with_capacity(size_guess), - }; - - let root_nodes = dirstate_map.root.as_ref(); - for node in root_nodes.iter() { - // Catch some corruptions before we write to disk - let full_path = node.full_path(dirstate_map.on_disk)?; - let base_name = node.base_name(dirstate_map.on_disk)?; - if full_path != base_name { - let explanation = format!( - "Dirstate root node '{}' is not at the root", - full_path - ); - return Err(HgError::corrupted(explanation).into()); - } - } - let root_nodes = writer.write_nodes(root_nodes)?; - - let unreachable_bytes = if append { - dirstate_map.unreachable_bytes - } else { - 0 - }; - let meta = TreeMetadata { - root_nodes, - nodes_with_entry_count: dirstate_map.nodes_with_entry_count.into(), - nodes_with_copy_source_count: dirstate_map - .nodes_with_copy_source_count - .into(), - unreachable_bytes: unreachable_bytes.into(), - unused: [0; 4], - ignore_patterns_hash: dirstate_map.ignore_patterns_hash, - }; - Ok((writer.out, meta, append, dirstate_map.old_data_size)) -} - -struct Writer<'dmap, 'on_disk> { - dirstate_map: &'dmap DirstateMap<'on_disk>, - append: bool, - out: Vec, -} - -impl Writer<'_, '_> { - fn write_nodes( - &mut self, - nodes: dirstate_map::ChildNodesRef, - ) -> Result { - // Reuse already-written nodes if possible - if self.append { - if let dirstate_map::ChildNodesRef::OnDisk(nodes_slice) = nodes { - let start = self.on_disk_offset_of(nodes_slice).expect( - "dirstate-v2 OnDisk nodes not found within on_disk", - ); - let len = child_nodes_len_from_usize(nodes_slice.len()); - return Ok(ChildNodes { start, len }); - } - } - - // `dirstate_map::ChildNodes::InMemory` contains a `HashMap` which has - // undefined iteration order. Sort to enable binary search in the - // written file. - let nodes = nodes.sorted(); - let nodes_len = nodes.len(); - - // First accumulate serialized nodes in a `Vec` - let mut on_disk_nodes = Vec::with_capacity(nodes_len); - for node in nodes { - let children = node.children(self.dirstate_map.on_disk)?; - let full_path = node.full_path(self.dirstate_map.on_disk)?; - self.check_children(&children, full_path)?; - - let children = self.write_nodes(children)?; - let full_path = self.write_path(full_path.as_bytes()); - let copy_source = if let Some(source) = - node.copy_source(self.dirstate_map.on_disk)? - { - self.write_path(source.as_bytes()) - } else { - PathSlice { - start: 0.into(), - len: 0.into(), - } - }; - on_disk_nodes.push(match node { - NodeRef::InMemory(path, node) => { - let (flags, size, mtime) = match &node.data { - dirstate_map::NodeData::Entry(entry) => { - Node::from_dirstate_entry(entry) - } - dirstate_map::NodeData::CachedDirectory { mtime } => { - // we currently never set a mtime if unknown file - // are present. - // So if we have a mtime for a directory, we know - // they are no unknown - // files and we - // blindly set ALL_UNKNOWN_RECORDED. - // - // We never set ALL_IGNORED_RECORDED since we - // don't track that case - // currently. - let mut flags = Flags::DIRECTORY - | Flags::HAS_MTIME - | Flags::ALL_UNKNOWN_RECORDED; - if mtime.second_ambiguous { - flags.insert(Flags::MTIME_SECOND_AMBIGUOUS) - } - (flags, 0.into(), (*mtime).into()) - } - dirstate_map::NodeData::None => ( - Flags::DIRECTORY, - 0.into(), - PackedTruncatedTimestamp::null(), - ), - }; - Node { - children, - copy_source, - full_path, - base_name_start: u16::try_from(path.base_name_start()) - // Could only panic for paths over 64 KiB - .expect("dirstate-v2 path length overflow") - .into(), - descendants_with_entry_count: node - .descendants_with_entry_count - .into(), - tracked_descendants_count: node - .tracked_descendants_count - .into(), - flags: flags.bits().into(), - size, - mtime, - } - } - NodeRef::OnDisk(node) => Node { - children, - copy_source, - full_path, - ..*node - }, - }) - } - // … so we can write them contiguously, after writing everything else - // they refer to. - let start = self.current_offset(); - let len = child_nodes_len_from_usize(nodes_len); - self.out.extend(on_disk_nodes.as_bytes()); - Ok(ChildNodes { start, len }) - } - - /// Catch some dirstate corruptions before writing them to disk - fn check_children( - &mut self, - children: &dirstate_map::ChildNodesRef, - full_path: &HgPath, - ) -> Result<(), DirstateError> { - for child in children.iter() { - let child_full_path = - child.full_path(self.dirstate_map.on_disk)?; - - let prefix_length = child_full_path.len() - // remove the filename - - child.base_name(self.dirstate_map.on_disk)?.len() - // remove the slash - - 1; - - let child_prefix = &child_full_path.as_bytes()[..prefix_length]; - - if child_prefix != full_path.as_bytes() { - let explanation = format!( - "dirstate child node's path '{}' \ - does not start with its parent's path '{}'", - child_full_path, full_path, - ); - - return Err(HgError::corrupted(explanation).into()); - } - } - Ok(()) - } - - /// If the given slice of items is within `on_disk`, returns its offset - /// from the start of `on_disk`. - fn on_disk_offset_of(&self, slice: &[T]) -> Option - where - T: BytesCast, - { - fn address_range(slice: &[u8]) -> std::ops::RangeInclusive { - let start = slice.as_ptr() as usize; - let end = start + slice.len(); - start..=end - } - let slice_addresses = address_range(slice.as_bytes()); - let on_disk_addresses = address_range(self.dirstate_map.on_disk); - if on_disk_addresses.contains(slice_addresses.start()) - && on_disk_addresses.contains(slice_addresses.end()) - { - let offset = slice_addresses.start() - on_disk_addresses.start(); - Some(offset_from_usize(offset)) - } else { - None - } - } - - fn current_offset(&mut self) -> Offset { - let mut offset = self.out.len(); - if self.append { - offset += self.dirstate_map.on_disk.len() - } - offset_from_usize(offset) - } - - fn write_path(&mut self, slice: &[u8]) -> PathSlice { - let len = path_len_from_usize(slice.len()); - // Reuse an already-written path if possible - if self.append { - if let Some(start) = self.on_disk_offset_of(slice) { - return PathSlice { start, len }; - } - } - let start = self.current_offset(); - self.out.extend(slice.as_bytes()); - PathSlice { start, len } - } -} - -fn offset_from_usize(x: usize) -> Offset { - u32::try_from(x) - // Could only panic for a dirstate file larger than 4 GiB - .expect("dirstate-v2 offset overflow") - .into() -} - -fn child_nodes_len_from_usize(x: usize) -> Size { - u32::try_from(x) - // Could only panic with over 4 billion nodes - .expect("dirstate-v2 slice length overflow") - .into() -} - -fn path_len_from_usize(x: usize) -> PathSize { - u16::try_from(x) - // Could only panic for paths over 64 KiB - .expect("dirstate-v2 path length overflow") - .into() -} - -impl From for PackedTruncatedTimestamp { - fn from(timestamp: TruncatedTimestamp) -> Self { - Self { - truncated_seconds: timestamp.truncated_seconds().into(), - nanoseconds: timestamp.nanoseconds().into(), - } - } -} - -impl TryFrom for TruncatedTimestamp { - type Error = DirstateV2ParseError; - - fn try_from( - timestamp: PackedTruncatedTimestamp, - ) -> Result { - Self::from_already_truncated( - timestamp.truncated_seconds.get(), - timestamp.nanoseconds.get(), - false, - ) - } -} -impl PackedTruncatedTimestamp { - fn null() -> Self { - Self { - truncated_seconds: 0.into(), - nanoseconds: 0.into(), - } - } -} - -/// Write a new tracked key to disk. -/// See `format.use-dirstate-tracked-hint` config help for more details. -pub fn write_tracked_key(repo: &Repo) -> Result<(), HgError> { - // TODO move this to the dirstate itself once it grows a `dirty` flag and - // can reason about which context it needs to write this in. - // For now, only this fast-path needs to think about the tracked hint. - // Use [`crate::dirstate_tree::dirstate_map::DirstateMap:: - // use_tracked_hint`] instead of looking at the requirements once - // refactored. - if !repo.requirements().contains(DIRSTATE_TRACKED_HINT_V1) { - return Ok(()); - } - // TODO use `hg_vfs` once the `InnerRevlog` is in. - let path = repo - .working_directory_path() - .join(".hg/dirstate-tracked-hint"); - std::fs::write(&path, Uuid::new_v4().as_bytes()).when_writing_file(&path) -} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate_tree/owning.rs --- a/rust/hg-core/src/dirstate_tree/owning.rs Mon Nov 04 10:38:17 2024 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ -use crate::{DirstateError, DirstateParents}; - -use super::dirstate_map::{DirstateIdentity, DirstateMap}; -use self_cell::self_cell; -use std::ops::Deref; - -self_cell!( - /// Keep a `DirstateMap<'owner>` next to the `owner` buffer that it - /// borrows. - pub struct OwningDirstateMap { - owner: Box + Send>, - #[covariant] - dependent: DirstateMap, - } -); - -impl OwningDirstateMap { - pub fn new_empty( - on_disk: OnDisk, - identity: Option, - ) -> Self - where - OnDisk: Deref + Send + 'static, - { - let on_disk = Box::new(on_disk); - - OwningDirstateMap::new(on_disk, |bytes| { - let mut empty = DirstateMap::empty(bytes); - empty.identity = identity; - empty - }) - } - - pub fn new_v1( - on_disk: OnDisk, - identity: Option, - ) -> Result<(Self, DirstateParents), DirstateError> - where - OnDisk: Deref + Send + 'static, - { - let on_disk = Box::new(on_disk); - let mut parents = DirstateParents::NULL; - - Ok(( - OwningDirstateMap::try_new(on_disk, |bytes| { - DirstateMap::new_v1(bytes, identity).map(|(dmap, p)| { - parents = p.unwrap_or(DirstateParents::NULL); - dmap - }) - })?, - parents, - )) - } - - pub fn new_v2( - on_disk: OnDisk, - data_size: usize, - metadata: &[u8], - uuid: Vec, - identity: Option, - ) -> Result - where - OnDisk: Deref + Send + 'static, - { - let on_disk = Box::new(on_disk); - - OwningDirstateMap::try_new(on_disk, |bytes| { - DirstateMap::new_v2(bytes, data_size, metadata, uuid, identity) - }) - } - - pub fn with_dmap_mut( - &mut self, - f: impl FnOnce(&mut DirstateMap) -> R, - ) -> R { - self.with_dependent_mut(|_owner, dmap| f(dmap)) - } - - pub fn get_map(&self) -> &DirstateMap { - self.borrow_dependent() - } - - pub fn on_disk(&self) -> &[u8] { - self.borrow_owner() - } - - pub fn old_uuid(&self) -> Option<&[u8]> { - self.get_map().old_uuid.as_deref() - } - - pub fn old_identity(&self) -> Option { - self.get_map().identity - } - - pub fn old_data_size(&self) -> usize { - self.get_map().old_data_size - } -} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate_tree/path_with_basename.rs --- a/rust/hg-core/src/dirstate_tree/path_with_basename.rs Mon Nov 04 10:38:17 2024 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,187 +0,0 @@ -use crate::utils::hg_path::HgPath; -use std::borrow::{Borrow, Cow}; - -/// Wraps `HgPath` or `HgPathBuf` to make it behave "as" its last path -/// component, a.k.a. its base name (as in Python’s `os.path.basename`), but -/// also allow recovering the full path. -/// -/// "Behaving as" means that equality and comparison consider only the base -/// name, and `std::borrow::Borrow` is implemented to return only the base -/// name. This allows using the base name as a map key while still being able -/// to recover the full path, in a single memory allocation. -#[derive(Debug)] -pub struct WithBasename { - full_path: T, - - /// The position after the last slash separator in `full_path`, or `0` - /// if there is no slash. - base_name_start: usize, -} - -impl WithBasename { - pub fn full_path(&self) -> &T { - &self.full_path - } -} - -fn find_base_name_start(full_path: &HgPath) -> usize { - if let Some(last_slash_position) = - full_path.as_bytes().iter().rposition(|&byte| byte == b'/') - { - last_slash_position + 1 - } else { - 0 - } -} - -impl> WithBasename { - pub fn new(full_path: T) -> Self { - Self { - base_name_start: find_base_name_start(full_path.as_ref()), - full_path, - } - } - - pub fn from_raw_parts(full_path: T, base_name_start: usize) -> Self { - debug_assert_eq!( - base_name_start, - find_base_name_start(full_path.as_ref()) - ); - Self { - base_name_start, - full_path, - } - } - - pub fn base_name(&self) -> &HgPath { - HgPath::new( - &self.full_path.as_ref().as_bytes()[self.base_name_start..], - ) - } - - pub fn base_name_start(&self) -> usize { - self.base_name_start - } -} - -impl> Borrow for WithBasename { - fn borrow(&self) -> &HgPath { - self.base_name() - } -} - -impl> std::hash::Hash for WithBasename { - fn hash(&self, hasher: &mut H) { - self.base_name().hash(hasher) - } -} - -impl + PartialEq> PartialEq for WithBasename { - fn eq(&self, other: &Self) -> bool { - self.base_name() == other.base_name() - } -} - -impl + Eq> Eq for WithBasename {} - -impl + PartialOrd> PartialOrd for WithBasename { - fn partial_cmp(&self, other: &Self) -> Option { - self.base_name().partial_cmp(other.base_name()) - } -} - -impl + Ord> Ord for WithBasename { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.base_name().cmp(other.base_name()) - } -} - -impl<'a> WithBasename<&'a HgPath> { - pub fn to_cow_borrowed(self) -> WithBasename> { - WithBasename { - full_path: Cow::Borrowed(self.full_path), - base_name_start: self.base_name_start, - } - } - - pub fn to_cow_owned<'b>(self) -> WithBasename> { - WithBasename { - full_path: Cow::Owned(self.full_path.to_owned()), - base_name_start: self.base_name_start, - } - } -} - -impl<'a> WithBasename<&'a HgPath> { - /// Returns an iterator of `WithBasename<&HgPath>` for the ancestor - /// directory paths of the given `path`, as well as `path` itself. - /// - /// For example, the full paths of inclusive ancestors of "a/b/c" are "a", - /// "a/b", and "a/b/c" in that order. - pub fn inclusive_ancestors_of( - path: &'a HgPath, - ) -> impl Iterator> { - let mut slash_positions = - path.as_bytes().iter().enumerate().filter_map(|(i, &byte)| { - if byte == b'/' { - Some(i) - } else { - None - } - }); - let mut opt_next_component_start = Some(0); - std::iter::from_fn(move || { - opt_next_component_start.take().map(|next_component_start| { - if let Some(slash_pos) = slash_positions.next() { - opt_next_component_start = Some(slash_pos + 1); - Self { - full_path: HgPath::new(&path.as_bytes()[..slash_pos]), - base_name_start: next_component_start, - } - } else { - // Not setting `opt_next_component_start` here: there will - // be no iteration after this one because `.take()` set it - // to `None`. - Self { - full_path: path, - base_name_start: next_component_start, - } - } - }) - }) - } -} - -#[test] -fn test() { - let a = WithBasename::new(HgPath::new("a").to_owned()); - assert_eq!(&**a.full_path(), HgPath::new(b"a")); - assert_eq!(a.base_name(), HgPath::new(b"a")); - - let cba = WithBasename::new(HgPath::new("c/b/a").to_owned()); - assert_eq!(&**cba.full_path(), HgPath::new(b"c/b/a")); - assert_eq!(cba.base_name(), HgPath::new(b"a")); - - assert_eq!(a, cba); - let borrowed: &HgPath = cba.borrow(); - assert_eq!(borrowed, HgPath::new("a")); -} - -#[test] -fn test_inclusive_ancestors() { - let mut iter = WithBasename::inclusive_ancestors_of(HgPath::new("a/bb/c")); - - let next = iter.next().unwrap(); - assert_eq!(*next.full_path(), HgPath::new("a")); - assert_eq!(next.base_name(), HgPath::new("a")); - - let next = iter.next().unwrap(); - assert_eq!(*next.full_path(), HgPath::new("a/bb")); - assert_eq!(next.base_name(), HgPath::new("bb")); - - let next = iter.next().unwrap(); - assert_eq!(*next.full_path(), HgPath::new("a/bb/c")); - assert_eq!(next.base_name(), HgPath::new("c")); - - assert!(iter.next().is_none()); -} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/dirstate_tree/status.rs --- a/rust/hg-core/src/dirstate_tree/status.rs Mon Nov 04 10:38:17 2024 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1036 +0,0 @@ -use crate::dirstate::entry::TruncatedTimestamp; -use crate::dirstate::status::IgnoreFnType; -use crate::dirstate::status::StatusPath; -use crate::dirstate_tree::dirstate_map::BorrowedPath; -use crate::dirstate_tree::dirstate_map::ChildNodesRef; -use crate::dirstate_tree::dirstate_map::DirstateMap; -use crate::dirstate_tree::dirstate_map::DirstateVersion; -use crate::dirstate_tree::dirstate_map::NodeRef; -use crate::dirstate_tree::on_disk::DirstateV2ParseError; -use crate::matchers::get_ignore_function; -use crate::matchers::{Matcher, VisitChildrenSet}; -use crate::utils::files::filesystem_now; -use crate::utils::files::get_bytes_from_os_string; -use crate::utils::files::get_bytes_from_path; -use crate::utils::files::get_path_from_bytes; -use crate::utils::hg_path::hg_path_to_path_buf; -use crate::utils::hg_path::HgPath; -use crate::BadMatch; -use crate::BadType; -use crate::DirstateStatus; -use crate::HgPathCow; -use crate::PatternFileWarning; -use crate::StatusError; -use crate::StatusOptions; -use once_cell::sync::OnceCell; -use rayon::prelude::*; -use sha1::{Digest, Sha1}; -use std::borrow::Cow; -use std::io; -use std::os::unix::prelude::FileTypeExt; -use std::path::Path; -use std::path::PathBuf; -use std::sync::Mutex; - -/// Returns the status of the working directory compared to its parent -/// changeset. -/// -/// This algorithm is based on traversing the filesystem tree (`fs` in function -/// and variable names) and dirstate tree at the same time. The core of this -/// traversal is the recursive `traverse_fs_directory_and_dirstate` function -/// and its use of `itertools::merge_join_by`. When reaching a path that only -/// exists in one of the two trees, depending on information requested by -/// `options` we may need to traverse the remaining subtree. -#[logging_timer::time("trace")] -pub fn status<'dirstate>( - dmap: &'dirstate mut DirstateMap, - matcher: &(dyn Matcher + Sync), - root_dir: PathBuf, - ignore_files: Vec, - options: StatusOptions, -) -> Result<(DirstateStatus<'dirstate>, Vec), StatusError> -{ - // Also cap for a Python caller of this function, but don't complain if - // the global threadpool has already been set since this code path is also - // being used by `rhg`, which calls this early. - let _ = crate::utils::cap_default_rayon_threads(); - - let (ignore_fn, warnings, patterns_changed): (IgnoreFnType, _, _) = - if options.list_ignored || options.list_unknown { - let (ignore_fn, warnings, changed) = match dmap.dirstate_version { - DirstateVersion::V1 => { - let (ignore_fn, warnings) = get_ignore_function( - ignore_files, - &root_dir, - &mut |_source, _pattern_bytes| {}, - )?; - (ignore_fn, warnings, None) - } - DirstateVersion::V2 => { - let mut hasher = Sha1::new(); - let (ignore_fn, warnings) = get_ignore_function( - ignore_files, - &root_dir, - &mut |source, pattern_bytes| { - // If inside the repo, use the relative version to - // make it deterministic inside tests. - // The performance hit should be negligible. - let source = source - .strip_prefix(&root_dir) - .unwrap_or(source); - let source = get_bytes_from_path(source); - - let mut subhasher = Sha1::new(); - subhasher.update(pattern_bytes); - let patterns_hash = subhasher.finalize(); - - hasher.update(source); - hasher.update(b" "); - hasher.update(patterns_hash); - hasher.update(b"\n"); - }, - )?; - let new_hash = *hasher.finalize().as_ref(); - let changed = new_hash != dmap.ignore_patterns_hash; - dmap.ignore_patterns_hash = new_hash; - (ignore_fn, warnings, Some(changed)) - } - }; - (ignore_fn, warnings, changed) - } else { - (Box::new(|&_| true), vec![], None) - }; - - let filesystem_time_at_status_start = - filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from); - - // If the repository is under the current directory, prefer using a - // relative path, so the kernel needs to traverse fewer directory in every - // call to `read_dir` or `symlink_metadata`. - // This is effective in the common case where the current directory is the - // repository root. - - // TODO: Better yet would be to use libc functions like `openat` and - // `fstatat` to remove such repeated traversals entirely, but the standard - // library does not provide APIs based on those. - // Maybe with a crate like https://crates.io/crates/openat instead? - let root_dir = if let Some(relative) = std::env::current_dir() - .ok() - .and_then(|cwd| root_dir.strip_prefix(cwd).ok()) - { - relative - } else { - &root_dir - }; - - let outcome = DirstateStatus { - filesystem_time_at_status_start, - ..Default::default() - }; - let common = StatusCommon { - dmap, - options, - matcher, - ignore_fn, - outcome: Mutex::new(outcome), - ignore_patterns_have_changed: patterns_changed, - new_cacheable_directories: Default::default(), - outdated_cached_directories: Default::default(), - filesystem_time_at_status_start, - }; - let is_at_repo_root = true; - let hg_path = &BorrowedPath::OnDisk(HgPath::new("")); - let has_ignored_ancestor = HasIgnoredAncestor::create(None, hg_path); - let root_cached_mtime = None; - // If the path we have for the repository root is a symlink, do follow it. - // (As opposed to symlinks within the working directory which are not - // followed, using `std::fs::symlink_metadata`.) - common.traverse_fs_directory_and_dirstate( - &has_ignored_ancestor, - dmap.root.as_ref(), - hg_path, - &DirEntry { - hg_path: Cow::Borrowed(HgPath::new(b"")), - fs_path: Cow::Borrowed(root_dir), - symlink_metadata: None, - file_type: FakeFileType::Directory, - }, - root_cached_mtime, - is_at_repo_root, - )?; - if let Some(file_set) = common.matcher.file_set() { - for file in file_set { - if !file.is_empty() && !dmap.has_node(file)? { - let path = hg_path_to_path_buf(file)?; - if let io::Result::Err(error) = - root_dir.join(path).symlink_metadata() - { - common.io_error(error, file) - } - } - } - } - let mut outcome = common.outcome.into_inner().unwrap(); - let new_cacheable = common.new_cacheable_directories.into_inner().unwrap(); - let outdated = common.outdated_cached_directories.into_inner().unwrap(); - - outcome.dirty = common.ignore_patterns_have_changed == Some(true) - || !outdated.is_empty() - || (!new_cacheable.is_empty() - && dmap.dirstate_version == DirstateVersion::V2); - - // Remove outdated mtimes before adding new mtimes, in case a given - // directory is both - for path in &outdated { - dmap.clear_cached_mtime(path)?; - } - for (path, mtime) in &new_cacheable { - dmap.set_cached_mtime(path, *mtime)?; - } - - Ok((outcome, warnings)) -} - -/// Bag of random things needed by various parts of the algorithm. Reduces the -/// number of parameters passed to functions. -struct StatusCommon<'a, 'tree, 'on_disk: 'tree> { - dmap: &'tree DirstateMap<'on_disk>, - options: StatusOptions, - matcher: &'a (dyn Matcher + Sync), - ignore_fn: IgnoreFnType<'a>, - outcome: Mutex>, - /// New timestamps of directories to be used for caching their readdirs - new_cacheable_directories: - Mutex, TruncatedTimestamp)>>, - /// Used to invalidate the readdir cache of directories - outdated_cached_directories: Mutex>>, - - /// Whether ignore files like `.hgignore` have changed since the previous - /// time a `status()` call wrote their hash to the dirstate. `None` means - /// we don’t know as this run doesn’t list either ignored or uknown files - /// and therefore isn’t reading `.hgignore`. - ignore_patterns_have_changed: Option, - - /// The current time at the start of the `status()` algorithm, as measured - /// and possibly truncated by the filesystem. - filesystem_time_at_status_start: Option, -} - -enum Outcome { - Modified, - Added, - Removed, - Deleted, - Clean, - Ignored, - Unknown, - Unsure, -} - -/// Lazy computation of whether a given path has a hgignored -/// ancestor. -struct HasIgnoredAncestor<'a> { - /// `path` and `parent` constitute the inputs to the computation, - /// `cache` stores the outcome. - path: &'a HgPath, - parent: Option<&'a HasIgnoredAncestor<'a>>, - cache: OnceCell, -} - -impl<'a> HasIgnoredAncestor<'a> { - fn create( - parent: Option<&'a HasIgnoredAncestor<'a>>, - path: &'a HgPath, - ) -> HasIgnoredAncestor<'a> { - Self { - path, - parent, - cache: OnceCell::new(), - } - } - - fn force(&self, ignore_fn: &IgnoreFnType<'_>) -> bool { - match self.parent { - None => false, - Some(parent) => { - *(self.cache.get_or_init(|| { - parent.force(ignore_fn) || ignore_fn(self.path) - })) - } - } - } -} - -impl<'a, 'tree, 'on_disk> StatusCommon<'a, 'tree, 'on_disk> { - fn push_outcome( - &self, - which: Outcome, - dirstate_node: &NodeRef<'tree, 'on_disk>, - ) -> Result<(), DirstateV2ParseError> { - let path = dirstate_node - .full_path_borrowed(self.dmap.on_disk)? - .detach_from_tree(); - let copy_source = if self.options.list_copies { - dirstate_node - .copy_source_borrowed(self.dmap.on_disk)? - .map(|source| source.detach_from_tree()) - } else { - None - }; - self.push_outcome_common(which, path, copy_source); - Ok(()) - } - - fn push_outcome_without_copy_source( - &self, - which: Outcome, - path: &BorrowedPath<'_, 'on_disk>, - ) { - self.push_outcome_common(which, path.detach_from_tree(), None) - } - - fn push_outcome_common( - &self, - which: Outcome, - path: HgPathCow<'on_disk>, - copy_source: Option>, - ) { - let mut outcome = self.outcome.lock().unwrap(); - let vec = match which { - Outcome::Modified => &mut outcome.modified, - Outcome::Added => &mut outcome.added, - Outcome::Removed => &mut outcome.removed, - Outcome::Deleted => &mut outcome.deleted, - Outcome::Clean => &mut outcome.clean, - Outcome::Ignored => &mut outcome.ignored, - Outcome::Unknown => &mut outcome.unknown, - Outcome::Unsure => &mut outcome.unsure, - }; - vec.push(StatusPath { path, copy_source }); - } - - fn read_dir( - &self, - hg_path: &HgPath, - fs_path: &Path, - is_at_repo_root: bool, - ) -> Result, ()> { - DirEntry::read_dir(fs_path, is_at_repo_root) - .map_err(|error| self.io_error(error, hg_path)) - } - - fn io_error(&self, error: std::io::Error, hg_path: &HgPath) { - let errno = error.raw_os_error().expect("expected real OS error"); - self.outcome - .lock() - .unwrap() - .bad - .push((hg_path.to_owned().into(), BadMatch::OsError(errno))) - } - - fn check_for_outdated_directory_cache( - &self, - dirstate_node: &NodeRef<'tree, 'on_disk>, - ) -> Result { - if self.ignore_patterns_have_changed == Some(true) - && dirstate_node.cached_directory_mtime()?.is_some() - { - self.outdated_cached_directories.lock().unwrap().push( - dirstate_node - .full_path_borrowed(self.dmap.on_disk)? - .detach_from_tree(), - ); - return Ok(true); - } - Ok(false) - } - - /// If this returns true, we can get accurate results by only using - /// `symlink_metadata` for child nodes that exist in the dirstate and don’t - /// need to call `read_dir`. - fn can_skip_fs_readdir( - &self, - directory_entry: &DirEntry, - cached_directory_mtime: Option, - ) -> bool { - if !self.options.list_unknown && !self.options.list_ignored { - // All states that we care about listing have corresponding - // dirstate entries. - // This happens for example with `hg status -mard`. - return true; - } - if !self.options.list_ignored - && self.ignore_patterns_have_changed == Some(false) - { - if let Some(cached_mtime) = cached_directory_mtime { - // The dirstate contains a cached mtime for this directory, set - // by a previous run of the `status` algorithm which found this - // directory eligible for `read_dir` caching. - if let Ok(meta) = directory_entry.symlink_metadata() { - if cached_mtime - .likely_equal_to_mtime_of(&meta) - .unwrap_or(false) - { - // The mtime of that directory has not changed - // since then, which means that the results of - // `read_dir` should also be unchanged. - return true; - } - } - } - } - false - } - - fn should_visit(set: &VisitChildrenSet, basename: &HgPath) -> bool { - match set { - VisitChildrenSet::This | VisitChildrenSet::Recursive => true, - VisitChildrenSet::Empty => false, - VisitChildrenSet::Set(children_to_visit) => { - children_to_visit.contains(basename) - } - } - } - - /// Returns whether all child entries of the filesystem directory have a - /// corresponding dirstate node or are ignored. - fn traverse_fs_directory_and_dirstate<'ancestor>( - &self, - has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>, - dirstate_nodes: ChildNodesRef<'tree, 'on_disk>, - directory_hg_path: &BorrowedPath<'tree, 'on_disk>, - directory_entry: &DirEntry, - cached_directory_mtime: Option, - is_at_repo_root: bool, - ) -> Result { - let children_set = self.matcher.visit_children_set(directory_hg_path); - if let VisitChildrenSet::Empty = children_set { - return Ok(false); - } - if self.can_skip_fs_readdir(directory_entry, cached_directory_mtime) { - dirstate_nodes - .par_iter() - .map(|dirstate_node| { - let fs_path = &directory_entry.fs_path; - let basename = - dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(); - let fs_path = fs_path.join(get_path_from_bytes(basename)); - if !Self::should_visit( - &children_set, - HgPath::new(basename), - ) { - return Ok(()); - } - match std::fs::symlink_metadata(&fs_path) { - Ok(fs_metadata) => { - let file_type = fs_metadata.file_type().into(); - let entry = DirEntry { - hg_path: Cow::Borrowed( - dirstate_node - .full_path(self.dmap.on_disk)?, - ), - fs_path: Cow::Borrowed(&fs_path), - symlink_metadata: Some(fs_metadata), - file_type, - }; - self.traverse_fs_and_dirstate( - &entry, - dirstate_node, - has_ignored_ancestor, - ) - } - Err(e) if e.kind() == std::io::ErrorKind::NotFound => { - self.traverse_dirstate_only(dirstate_node) - } - Err(error) => { - let hg_path = - dirstate_node.full_path(self.dmap.on_disk)?; - self.io_error(error, hg_path); - Ok(()) - } - } - }) - .collect::>()?; - - // We don’t know, so conservatively say this isn’t the case - let children_all_have_dirstate_node_or_are_ignored = false; - - return Ok(children_all_have_dirstate_node_or_are_ignored); - } - - let readdir_succeeded; - let mut fs_entries = if let Ok(entries) = self.read_dir( - directory_hg_path, - &directory_entry.fs_path, - is_at_repo_root, - ) { - readdir_succeeded = true; - entries - } else { - // Treat an unreadable directory (typically because of insufficient - // permissions) like an empty directory. `self.read_dir` has - // already called `self.io_error` so a warning will be emitted. - // We still need to remember that there was an error so that we - // know not to cache this result. - readdir_succeeded = false; - Vec::new() - }; - - // `merge_join_by` requires both its input iterators to be sorted: - - let dirstate_nodes = dirstate_nodes.sorted(); - // `sort_unstable_by_key` doesn’t allow keys borrowing from the value: - // https://github.com/rust-lang/rust/issues/34162 - fs_entries.sort_unstable_by(|e1, e2| e1.hg_path.cmp(&e2.hg_path)); - - // Propagate here any error that would happen inside the comparison - // callback below - for dirstate_node in &dirstate_nodes { - dirstate_node.base_name(self.dmap.on_disk)?; - } - itertools::merge_join_by( - dirstate_nodes, - &fs_entries, - |dirstate_node, fs_entry| { - // This `unwrap` never panics because we already propagated - // those errors above - dirstate_node - .base_name(self.dmap.on_disk) - .unwrap() - .cmp(&fs_entry.hg_path) - }, - ) - .par_bridge() - .map(|pair| { - use itertools::EitherOrBoth::*; - let basename = match &pair { - Left(dirstate_node) | Both(dirstate_node, _) => HgPath::new( - dirstate_node.base_name(self.dmap.on_disk)?.as_bytes(), - ), - Right(fs_entry) => &fs_entry.hg_path, - }; - if !Self::should_visit(&children_set, basename) { - return Ok(false); - } - let has_dirstate_node_or_is_ignored = match pair { - Both(dirstate_node, fs_entry) => { - self.traverse_fs_and_dirstate( - fs_entry, - dirstate_node, - has_ignored_ancestor, - )?; - true - } - Left(dirstate_node) => { - self.traverse_dirstate_only(dirstate_node)?; - true - } - Right(fs_entry) => self.traverse_fs_only( - has_ignored_ancestor.force(&self.ignore_fn), - directory_hg_path, - fs_entry, - ), - }; - Ok(has_dirstate_node_or_is_ignored) - }) - .try_reduce(|| true, |a, b| Ok(a && b)) - .map(|res| res && readdir_succeeded) - } - - fn traverse_fs_and_dirstate<'ancestor>( - &self, - fs_entry: &DirEntry, - dirstate_node: NodeRef<'tree, 'on_disk>, - has_ignored_ancestor: &'ancestor HasIgnoredAncestor<'ancestor>, - ) -> Result<(), DirstateV2ParseError> { - let outdated_dircache = - self.check_for_outdated_directory_cache(&dirstate_node)?; - let hg_path = &dirstate_node.full_path_borrowed(self.dmap.on_disk)?; - let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink(); - if !file_or_symlink { - // If we previously had a file here, it was removed (with - // `hg rm` or similar) or deleted before it could be - // replaced by a directory or something else. - self.mark_removed_or_deleted_if_file(&dirstate_node)?; - } - if let Some(bad_type) = fs_entry.is_bad() { - if self.matcher.exact_match(hg_path) { - let path = dirstate_node.full_path(self.dmap.on_disk)?; - self.outcome.lock().unwrap().bad.push(( - path.to_owned().into(), - BadMatch::BadType(bad_type), - )) - } - } - if fs_entry.is_dir() { - if self.options.collect_traversed_dirs { - self.outcome - .lock() - .unwrap() - .traversed - .push(hg_path.detach_from_tree()) - } - let is_ignored = HasIgnoredAncestor::create( - Some(has_ignored_ancestor), - hg_path, - ); - let is_at_repo_root = false; - let children_all_have_dirstate_node_or_are_ignored = self - .traverse_fs_directory_and_dirstate( - &is_ignored, - dirstate_node.children(self.dmap.on_disk)?, - hg_path, - fs_entry, - dirstate_node.cached_directory_mtime()?, - is_at_repo_root, - )?; - self.maybe_save_directory_mtime( - children_all_have_dirstate_node_or_are_ignored, - fs_entry, - dirstate_node, - outdated_dircache, - )? - } else { - if file_or_symlink && self.matcher.matches(hg_path) { - if let Some(entry) = dirstate_node.entry()? { - if !entry.any_tracked() { - // Forward-compat if we start tracking unknown/ignored - // files for caching reasons - self.mark_unknown_or_ignored( - has_ignored_ancestor.force(&self.ignore_fn), - hg_path, - ); - } - if entry.added() { - self.push_outcome(Outcome::Added, &dirstate_node)?; - } else if entry.removed() { - self.push_outcome(Outcome::Removed, &dirstate_node)?; - } else if entry.modified() { - self.push_outcome(Outcome::Modified, &dirstate_node)?; - } else { - self.handle_normal_file(&dirstate_node, fs_entry)?; - } - } else { - // `node.entry.is_none()` indicates a "directory" - // node, but the filesystem has a file - self.mark_unknown_or_ignored( - has_ignored_ancestor.force(&self.ignore_fn), - hg_path, - ); - } - } - - for child_node in dirstate_node.children(self.dmap.on_disk)?.iter() - { - self.traverse_dirstate_only(child_node)? - } - } - Ok(()) - } - - /// Save directory mtime if applicable. - /// - /// `outdated_directory_cache` is `true` if we've just invalidated the - /// cache for this directory in `check_for_outdated_directory_cache`, - /// which forces the update. - fn maybe_save_directory_mtime( - &self, - children_all_have_dirstate_node_or_are_ignored: bool, - directory_entry: &DirEntry, - dirstate_node: NodeRef<'tree, 'on_disk>, - outdated_directory_cache: bool, - ) -> Result<(), DirstateV2ParseError> { - if !children_all_have_dirstate_node_or_are_ignored { - return Ok(()); - } - // All filesystem directory entries from `read_dir` have a - // corresponding node in the dirstate, so we can reconstitute the - // names of those entries without calling `read_dir` again. - - // TODO: use let-else here and below when available: - // https://github.com/rust-lang/rust/issues/87335 - let status_start = if let Some(status_start) = - &self.filesystem_time_at_status_start - { - status_start - } else { - return Ok(()); - }; - - // Although the Rust standard library’s `SystemTime` type - // has nanosecond precision, the times reported for a - // directory’s (or file’s) modified time may have lower - // resolution based on the filesystem (for example ext3 - // only stores integer seconds), kernel (see - // https://stackoverflow.com/a/14393315/1162888), etc. - let metadata = match directory_entry.symlink_metadata() { - Ok(meta) => meta, - Err(_) => return Ok(()), - }; - - let directory_mtime = match TruncatedTimestamp::for_reliable_mtime_of( - &metadata, - status_start, - ) { - Ok(Some(directory_mtime)) => directory_mtime, - Ok(None) => { - // The directory was modified too recently, - // don’t cache its `read_dir` results. - // - // 1. A change to this directory (direct child was added or - // removed) cause its mtime to be set (possibly truncated) - // to `directory_mtime` - // 2. This `status` algorithm calls `read_dir` - // 3. An other change is made to the same directory is made so - // that calling `read_dir` agin would give different - // results, but soon enough after 1. that the mtime stays - // the same - // - // On a system where the time resolution poor, this - // scenario is not unlikely if all three steps are caused - // by the same script. - return Ok(()); - } - Err(_) => { - // OS/libc does not support mtime? - return Ok(()); - } - }; - // We’ve observed (through `status_start`) that time has - // “progressed” since `directory_mtime`, so any further - // change to this directory is extremely likely to cause a - // different mtime. - // - // Having the same mtime again is not entirely impossible - // since the system clock is not monotonous. It could jump - // backward to some point before `directory_mtime`, then a - // directory change could potentially happen during exactly - // the wrong tick. - // - // We deem this scenario (unlike the previous one) to be - // unlikely enough in practice. - - let is_up_to_date = if let Some(cached) = - dirstate_node.cached_directory_mtime()? - { - !outdated_directory_cache && cached.likely_equal(directory_mtime) - } else { - false - }; - if !is_up_to_date { - let hg_path = dirstate_node - .full_path_borrowed(self.dmap.on_disk)? - .detach_from_tree(); - self.new_cacheable_directories - .lock() - .unwrap() - .push((hg_path, directory_mtime)) - } - Ok(()) - } - - /// A file that is clean in the dirstate was found in the filesystem - fn handle_normal_file( - &self, - dirstate_node: &NodeRef<'tree, 'on_disk>, - fs_entry: &DirEntry, - ) -> Result<(), DirstateV2ParseError> { - // Keep the low 31 bits - fn truncate_u64(value: u64) -> i32 { - (value & 0x7FFF_FFFF) as i32 - } - - let fs_metadata = match fs_entry.symlink_metadata() { - Ok(meta) => meta, - Err(_) => return Ok(()), - }; - - let entry = dirstate_node - .entry()? - .expect("handle_normal_file called with entry-less node"); - let mode_changed = - || self.options.check_exec && entry.mode_changed(&fs_metadata); - let size = entry.size(); - let size_changed = size != truncate_u64(fs_metadata.len()); - if size >= 0 && size_changed && fs_metadata.file_type().is_symlink() { - // issue6456: Size returned may be longer due to encryption - // on EXT-4 fscrypt. TODO maybe only do it on EXT4? - self.push_outcome(Outcome::Unsure, dirstate_node)? - } else if dirstate_node.has_copy_source() - || entry.is_from_other_parent() - || (size >= 0 && (size_changed || mode_changed())) - { - self.push_outcome(Outcome::Modified, dirstate_node)? - } else { - let mtime_looks_clean = if let Some(dirstate_mtime) = - entry.truncated_mtime() - { - let fs_mtime = TruncatedTimestamp::for_mtime_of(&fs_metadata) - .expect("OS/libc does not support mtime?"); - // There might be a change in the future if for example the - // internal clock become off while process run, but this is a - // case where the issues the user would face - // would be a lot worse and there is nothing we - // can really do. - fs_mtime.likely_equal(dirstate_mtime) - } else { - // No mtime in the dirstate entry - false - }; - if !mtime_looks_clean { - self.push_outcome(Outcome::Unsure, dirstate_node)? - } else if self.options.list_clean { - self.push_outcome(Outcome::Clean, dirstate_node)? - } - } - Ok(()) - } - - /// A node in the dirstate tree has no corresponding filesystem entry - fn traverse_dirstate_only( - &self, - dirstate_node: NodeRef<'tree, 'on_disk>, - ) -> Result<(), DirstateV2ParseError> { - self.check_for_outdated_directory_cache(&dirstate_node)?; - self.mark_removed_or_deleted_if_file(&dirstate_node)?; - dirstate_node - .children(self.dmap.on_disk)? - .par_iter() - .map(|child_node| self.traverse_dirstate_only(child_node)) - .collect() - } - - /// A node in the dirstate tree has no corresponding *file* on the - /// filesystem - /// - /// Does nothing on a "directory" node - fn mark_removed_or_deleted_if_file( - &self, - dirstate_node: &NodeRef<'tree, 'on_disk>, - ) -> Result<(), DirstateV2ParseError> { - if let Some(entry) = dirstate_node.entry()? { - if !entry.any_tracked() { - // Future-compat for when we start storing ignored and unknown - // files for caching reasons - return Ok(()); - } - let path = dirstate_node.full_path(self.dmap.on_disk)?; - if self.matcher.matches(path) { - if entry.removed() { - self.push_outcome(Outcome::Removed, dirstate_node)? - } else { - self.push_outcome(Outcome::Deleted, dirstate_node)? - } - } - } - Ok(()) - } - - /// Something in the filesystem has no corresponding dirstate node - /// - /// Returns whether that path is ignored - fn traverse_fs_only( - &self, - has_ignored_ancestor: bool, - directory_hg_path: &HgPath, - fs_entry: &DirEntry, - ) -> bool { - let hg_path = directory_hg_path.join(&fs_entry.hg_path); - let file_or_symlink = fs_entry.is_file() || fs_entry.is_symlink(); - if fs_entry.is_dir() { - let is_ignored = - has_ignored_ancestor || (self.ignore_fn)(&hg_path); - let traverse_children = if is_ignored { - // Descendants of an ignored directory are all ignored - self.options.list_ignored - } else { - // Descendants of an unknown directory may be either unknown or - // ignored - self.options.list_unknown || self.options.list_ignored - }; - if traverse_children { - let is_at_repo_root = false; - if let Ok(children_fs_entries) = - self.read_dir(&hg_path, &fs_entry.fs_path, is_at_repo_root) - { - children_fs_entries.par_iter().for_each(|child_fs_entry| { - self.traverse_fs_only( - is_ignored, - &hg_path, - child_fs_entry, - ); - }) - } - if self.options.collect_traversed_dirs { - self.outcome.lock().unwrap().traversed.push(hg_path.into()) - } - } - is_ignored - } else if file_or_symlink { - if self.matcher.matches(&hg_path) { - self.mark_unknown_or_ignored( - has_ignored_ancestor, - &BorrowedPath::InMemory(&hg_path), - ) - } else { - // We haven’t computed whether this path is ignored. It - // might not be, and a future run of status might have a - // different matcher that matches it. So treat it as not - // ignored. That is, inhibit readdir caching of the parent - // directory. - false - } - } else { - // This is neither a directory, a plain file, or a symlink. - // Treat it like an ignored file. - true - } - } - - /// Returns whether that path is ignored - fn mark_unknown_or_ignored( - &self, - has_ignored_ancestor: bool, - hg_path: &BorrowedPath<'_, 'on_disk>, - ) -> bool { - let is_ignored = has_ignored_ancestor || (self.ignore_fn)(hg_path); - if is_ignored { - if self.options.list_ignored { - self.push_outcome_without_copy_source( - Outcome::Ignored, - hg_path, - ) - } - } else if self.options.list_unknown { - self.push_outcome_without_copy_source(Outcome::Unknown, hg_path) - } - is_ignored - } -} - -/// Since [`std::fs::FileType`] cannot be built directly, we emulate what we -/// care about. -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -enum FakeFileType { - File, - Directory, - Symlink, - BadType(BadType), -} - -impl From for FakeFileType { - fn from(f: std::fs::FileType) -> Self { - if f.is_dir() { - Self::Directory - } else if f.is_file() { - Self::File - } else if f.is_symlink() { - Self::Symlink - } else if f.is_fifo() { - Self::BadType(BadType::FIFO) - } else if f.is_block_device() { - Self::BadType(BadType::BlockDevice) - } else if f.is_char_device() { - Self::BadType(BadType::CharacterDevice) - } else if f.is_socket() { - Self::BadType(BadType::Socket) - } else { - Self::BadType(BadType::Unknown) - } - } -} - -struct DirEntry<'a> { - /// Path as stored in the dirstate, or just the filename for optimization. - hg_path: HgPathCow<'a>, - /// Filesystem path - fs_path: Cow<'a, Path>, - /// Lazily computed - symlink_metadata: Option, - /// Already computed for ergonomics. - file_type: FakeFileType, -} - -impl<'a> DirEntry<'a> { - /// Returns **unsorted** entries in the given directory, with name, - /// metadata and file type. - /// - /// If a `.hg` sub-directory is encountered: - /// - /// * At the repository root, ignore that sub-directory - /// * Elsewhere, we’re listing the content of a sub-repo. Return an empty - /// list instead. - fn read_dir(path: &Path, is_at_repo_root: bool) -> io::Result> { - // `read_dir` returns a "not found" error for the empty path - let at_cwd = path == Path::new(""); - let read_dir_path = if at_cwd { Path::new(".") } else { path }; - let mut results = Vec::new(); - for entry in read_dir_path.read_dir()? { - let entry = entry?; - let file_type = match entry.file_type() { - Ok(v) => v, - Err(e) => { - // race with file deletion? - if e.kind() == std::io::ErrorKind::NotFound { - continue; - } else { - return Err(e); - } - } - }; - let file_name = entry.file_name(); - // FIXME don't do this when cached - if file_name == ".hg" { - if is_at_repo_root { - // Skip the repo’s own .hg (might be a symlink) - continue; - } else if file_type.is_dir() { - // A .hg sub-directory at another location means a subrepo, - // skip it entirely. - return Ok(Vec::new()); - } - } - let full_path = if at_cwd { - file_name.clone().into() - } else { - entry.path() - }; - let filename = - Cow::Owned(get_bytes_from_os_string(file_name).into()); - let file_type = FakeFileType::from(file_type); - results.push(DirEntry { - hg_path: filename, - fs_path: Cow::Owned(full_path.to_path_buf()), - symlink_metadata: None, - file_type, - }) - } - Ok(results) - } - - fn symlink_metadata(&self) -> Result { - match &self.symlink_metadata { - Some(meta) => Ok(meta.clone()), - None => std::fs::symlink_metadata(&self.fs_path), - } - } - - fn is_dir(&self) -> bool { - self.file_type == FakeFileType::Directory - } - - fn is_file(&self) -> bool { - self.file_type == FakeFileType::File - } - - fn is_symlink(&self) -> bool { - self.file_type == FakeFileType::Symlink - } - - fn is_bad(&self) -> Option { - match self.file_type { - FakeFileType::BadType(ty) => Some(ty), - _ => None, - } - } -} diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/lib.rs --- a/rust/hg-core/src/lib.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/lib.rs Mon Nov 04 11:00:58 2024 +0100 @@ -11,7 +11,6 @@ pub mod sparse; pub use ancestors::{AncestorsIterator, MissingAncestors}; pub mod dirstate; -pub mod dirstate_tree; pub mod discovery; pub mod exit_codes; pub mod fncache; @@ -69,7 +68,7 @@ pub type FastHashMap = HashMap; // TODO: should this be the default `FastHashMap` for all of hg-core, not just -// dirstate_tree? How does XxHash compare with AHash, hashbrown’s default? +// dirstate? How does XxHash compare with AHash, hashbrown’s default? pub type FastHashbrownMap = hashbrown::HashMap; diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/repo.rs Mon Nov 04 11:00:58 2024 +0100 @@ -1,10 +1,8 @@ use crate::config::{Config, ConfigError, ConfigParseError}; +use crate::dirstate::dirstate_map::{DirstateIdentity, DirstateMapWriteMode}; +use crate::dirstate::on_disk::Docket as DirstateDocket; +use crate::dirstate::owning::OwningDirstateMap; use crate::dirstate::DirstateParents; -use crate::dirstate_tree::dirstate_map::{ - DirstateIdentity, DirstateMapWriteMode, -}; -use crate::dirstate_tree::on_disk::Docket as DirstateDocket; -use crate::dirstate_tree::owning::OwningDirstateMap; use crate::errors::HgResultExt; use crate::errors::{HgError, IoResultExt}; use crate::lock::{try_with_lock_no_wait, LockError}; @@ -321,8 +319,7 @@ let parents = if dirstate.is_empty() { DirstateParents::NULL } else if self.use_dirstate_v2() { - let docket_res = - crate::dirstate_tree::on_disk::read_docket(&dirstate); + let docket_res = crate::dirstate::on_disk::read_docket(&dirstate); match docket_res { Ok(docket) => docket.parents(), Err(_) => { @@ -361,8 +358,7 @@ if dirstate.is_empty() { Ok((identity, None, 0)) } else { - let docket_res = - crate::dirstate_tree::on_disk::read_docket(&dirstate); + let docket_res = crate::dirstate::on_disk::read_docket(&dirstate); match docket_res { Ok(docket) => { self.dirstate_parents.set(docket.parents()); @@ -464,9 +460,8 @@ if dirstate_file_contents.is_empty() { return Ok(OwningDirstateMap::new_empty(Vec::new(), identity)); } - let docket = crate::dirstate_tree::on_disk::read_docket( - &dirstate_file_contents, - )?; + let docket = + crate::dirstate::on_disk::read_docket(&dirstate_file_contents)?; debug_wait_for_file_or_print( self.config(), "dirstate.post-docket-read-file", diff -r 7ffc71552662 -r db065b33fa56 rust/hg-core/src/update.rs --- a/rust/hg-core/src/update.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-core/src/update.rs Mon Nov 04 11:00:58 2024 +0100 @@ -10,10 +10,8 @@ }; use crate::{ + dirstate::{dirstate_map::DirstateEntryReset, on_disk::write_tracked_key}, dirstate::{ParentFileData, TruncatedTimestamp}, - dirstate_tree::{ - dirstate_map::DirstateEntryReset, on_disk::write_tracked_key, - }, errors::{HgError, IoResultExt}, exit_codes, narrow, operations::{list_rev_tracked_files, ExpandedManifestEntry}, diff -r 7ffc71552662 -r db065b33fa56 rust/hg-cpython/src/dirstate.rs --- a/rust/hg-cpython/src/dirstate.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-cpython/src/dirstate.rs Mon Nov 04 11:00:58 2024 +0100 @@ -21,7 +21,7 @@ }; use cpython::{PyBytes, PyDict, PyList, PyModule, PyObject, PyResult, Python}; use dirstate_map::{DirstateIdentity, DirstateMap}; -use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER; +use hg::dirstate::on_disk::V2_FORMAT_MARKER; /// Create the module, with `__package__` given from parent pub fn init_module(py: Python, package: &str) -> PyResult { diff -r 7ffc71552662 -r db065b33fa56 rust/hg-cpython/src/dirstate/copymap.rs --- a/rust/hg-cpython/src/dirstate/copymap.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-cpython/src/dirstate/copymap.rs Mon Nov 04 11:00:58 2024 +0100 @@ -15,8 +15,8 @@ use crate::dirstate::dirstate_map::v2_error; use crate::dirstate::dirstate_map::DirstateMap; +use hg::dirstate::on_disk::DirstateV2ParseError; use hg::dirstate::CopyMapIter; -use hg::dirstate_tree::on_disk::DirstateV2ParseError; use hg::utils::hg_path::HgPath; py_class!(pub class CopyMap |py| { diff -r 7ffc71552662 -r db065b33fa56 rust/hg-cpython/src/dirstate/dirstate_map.rs --- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Mon Nov 04 11:00:58 2024 +0100 @@ -15,10 +15,10 @@ PyResult, Python, PythonObject, ToPyObject, UnsafePyLeaked, }; use hg::{ - dirstate::{ParentFileData, TruncatedTimestamp}, - dirstate_tree::dirstate_map::{ + dirstate::dirstate_map::{ DirstateEntryReset, DirstateIdentity as CoreDirstateIdentity, }, + dirstate::{ParentFileData, TruncatedTimestamp}, }; use crate::{ @@ -27,9 +27,9 @@ pybytes_deref::PyBytesDeref, }; use hg::{ - dirstate::StateMapIter, dirstate_tree::dirstate_map::DirstateMapWriteMode, - dirstate_tree::on_disk::DirstateV2ParseError, - dirstate_tree::owning::OwningDirstateMap, revlog::Node, + dirstate::dirstate_map::DirstateMapWriteMode, + dirstate::on_disk::DirstateV2ParseError, + dirstate::owning::OwningDirstateMap, dirstate::StateMapIter, revlog::Node, utils::files::normalize_case, utils::hg_path::HgPath, DirstateEntry, DirstateError, DirstateParents, }; diff -r 7ffc71552662 -r db065b33fa56 rust/rhg/src/error.rs --- a/rust/rhg/src/error.rs Mon Nov 04 10:38:17 2024 +0100 +++ b/rust/rhg/src/error.rs Mon Nov 04 11:00:58 2024 +0100 @@ -3,7 +3,7 @@ use crate::NoRepoInCwdError; use format_bytes::format_bytes; use hg::config::{ConfigError, ConfigParseError, ConfigValueParseError}; -use hg::dirstate_tree::on_disk::DirstateV2ParseError; +use hg::dirstate::on_disk::DirstateV2ParseError; use hg::errors::HgError; use hg::exit_codes; use hg::repo::RepoError;