Mercurial > hg
changeset 47330:73f23e7610f8
dirstate-tree: Remove DirstateMap::iter_node_data_mut
In an upcoming changeset we want DirstateMap to be able to work directly
with nodes in their "on disk" representation, without always allocating
corresponding in-memory data structures. Nodes would have two possible
representations: one immutable "on disk" refering to the bytes buffer
of the contents of the .hg/dirstate file, and one mutable with HashMap
like the curren data structure.
These nodes would have copy-on-write semantics: when an immutable node
would need to be mutated, instead we allocate new mutable node for it and
its ancestors.
A mutable iterator of the entire tree would still be possible, but it would
become much more expensive since we’d need to allocate mutable nodes for
everything.
Instead, remove this iterator. It was only used to clear ambiguous mtimes
while serializing the `DirstateMap`. Instead clearing and serialization are
now two separate passes. Clearing first uses an immutable iterator to collect
the paths of nodes that need to be cleared, then accesses only those nodes
mutably.
Differential Revision: https://phab.mercurial-scm.org/D10744
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 13:15:00 +0200 |
parents | 717a94b423b9 |
children | 0252600fd1cf |
files | rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/dirstate/parsers.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs |
diffstat | 4 files changed, 71 insertions(+), 91 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/dirstate_map.rs Fri May 28 17:33:20 2021 -0400 +++ b/rust/hg-core/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -5,7 +5,6 @@ // This software may be used and distributed according to the terms of the // GNU General Public License version 2 or any later version. -use crate::dirstate::parsers::clear_ambiguous_mtime; use crate::dirstate::parsers::Timestamp; use crate::{ dirstate::EntryState, @@ -166,7 +165,7 @@ ) { for filename in filenames { if let Some(entry) = self.state_map.get_mut(&filename) { - if clear_ambiguous_mtime(entry, now) { + if entry.clear_ambiguous_mtime(now) { self.get_non_normal_other_parent_entries() .0 .insert(filename.to_owned());
--- a/rust/hg-core/src/dirstate/parsers.rs Fri May 28 17:33:20 2021 -0400 +++ b/rust/hg-core/src/dirstate/parsers.rs Wed May 19 13:15:00 2021 +0200 @@ -126,25 +126,31 @@ /// Seconds since the Unix epoch pub struct Timestamp(pub u64); -pub fn clear_ambiguous_mtime( - entry: &mut DirstateEntry, - mtime_now: i32, -) -> bool { - let ambiguous = - entry.state == EntryState::Normal && entry.mtime == mtime_now; - if ambiguous { - // The file was last modified "simultaneously" with the current - // write to dirstate (i.e. within the same second for file- - // systems with a granularity of 1 sec). This commonly happens - // for at least a couple of files on 'update'. - // The user could change the file without changing its size - // within the same second. Invalidate the file's mtime in - // dirstate, forcing future 'status' calls to compare the - // contents of the file if the size is the same. This prevents - // mistakenly treating such files as clean. - entry.mtime = -1; +impl DirstateEntry { + pub fn mtime_is_ambiguous(&self, now: i32) -> bool { + self.state == EntryState::Normal && self.mtime == now } - ambiguous + + pub fn clear_ambiguous_mtime(&mut self, now: i32) -> bool { + let ambiguous = self.mtime_is_ambiguous(now); + if ambiguous { + // The file was last modified "simultaneously" with the current + // write to dirstate (i.e. within the same second for file- + // systems with a granularity of 1 sec). This commonly happens + // for at least a couple of files on 'update'. + // The user could change the file without changing its size + // within the same second. Invalidate the file's mtime in + // dirstate, forcing future 'status' calls to compare the + // contents of the file if the size is the same. This prevents + // mistakenly treating such files as clean. + self.clear_mtime() + } + ambiguous + } + + pub fn clear_mtime(&mut self) { + self.mtime = -1; + } } pub fn pack_dirstate( @@ -170,7 +176,7 @@ packed.extend(parents.p2.as_bytes()); for (filename, entry) in state_map.iter_mut() { - clear_ambiguous_mtime(entry, now); + entry.clear_ambiguous_mtime(now); pack_entry( filename, entry,
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri May 28 17:33:20 2021 -0400 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -6,7 +6,6 @@ use super::on_disk; use super::path_with_basename::WithBasename; -use crate::dirstate::parsers::clear_ambiguous_mtime; use crate::dirstate::parsers::pack_entry; use crate::dirstate::parsers::packed_entry_size; use crate::dirstate::parsers::parse_dirstate_entries; @@ -79,13 +78,6 @@ } } -/// `(full_path, entry, copy_source)` -type NodeDataMut<'tree, 'on_disk> = ( - &'tree HgPath, - &'tree mut Option<DirstateEntry>, - &'tree mut Option<Cow<'on_disk, HgPath>>, -); - impl<'on_disk> DirstateMap<'on_disk> { pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { Self { @@ -252,7 +244,7 @@ fn iter_nodes<'a>( &'a self, - ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a { + ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a { // Depth first tree traversal. // // If we could afford internal iteration and recursion, @@ -279,7 +271,7 @@ // Pseudo-recursion let new_iter = child_node.children.iter(); let old_iter = std::mem::replace(&mut iter, new_iter); - let key = &**key.full_path(); + let key = key.full_path(); stack.push((key, child_node, old_iter)); } // Found the end of a `children.iter()` iterator. @@ -296,42 +288,16 @@ }) } - /// Mutable iterator for the `(entry, copy source)` of each node. - /// - /// It would not be safe to yield mutable references to nodes themeselves - /// with `-> impl Iterator<Item = &mut Node>` since child nodes are - /// reachable from their ancestor nodes, potentially creating multiple - /// `&mut` references to a given node. - fn iter_node_data_mut<'tree>( - &'tree mut self, - ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree { - // Explict stack for pseudo-recursion, see `iter_nodes` above. - let mut stack = Vec::new(); - let mut iter = self.root.iter_mut(); - std::iter::from_fn(move || { - while let Some((key, child_node)) = iter.next() { - // Pseudo-recursion - let data = ( - &**key.full_path(), - &mut child_node.entry, - &mut child_node.copy_source, - ); - let new_iter = child_node.children.iter_mut(); - let old_iter = std::mem::replace(&mut iter, new_iter); - stack.push((data, old_iter)); + fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) { + for path in paths { + if let Some(node) = + Self::get_node_mut(&mut self.root, path.as_ref()) + { + if let Some(entry) = node.entry.as_mut() { + entry.clear_mtime(); + } } - // Found the end of a `children.values_mut()` iterator. - if let Some((data, next_iter)) = stack.pop() { - // "Return" from pseudo-recursion by restoring state from the - // explicit stack - iter = next_iter; - - Some(data) - } else { - // Reached the bottom of the stack, we’re done - None - } - }) + } } } @@ -427,7 +393,7 @@ for filename in filenames { if let Some(node) = Self::get_node_mut(&mut self.root, &filename) { if let Some(entry) = node.entry.as_mut() { - clear_ambiguous_mtime(entry, now); + entry.clear_ambiguous_mtime(now); } } } @@ -453,7 +419,7 @@ .filter(|entry| { entry.is_non_normal() || entry.is_from_other_parent() }) - .map(|_| path) + .map(|_| &**path) })) } @@ -475,7 +441,7 @@ node.entry .as_ref() .filter(|entry| entry.is_non_normal()) - .map(|_| path) + .map(|_| &**path) })) } @@ -486,7 +452,7 @@ node.entry .as_ref() .filter(|entry| entry.is_from_other_parent()) - .map(|_| path) + .map(|_| &**path) })) } @@ -522,29 +488,33 @@ parents: DirstateParents, now: Timestamp, ) -> Result<Vec<u8>, DirstateError> { + let now: i32 = now.0.try_into().expect("time overflow"); + let mut ambiguous_mtimes = Vec::new(); // Optizimation (to be measured?): pre-compute size to avoid `Vec` // reallocations let mut size = parents.as_bytes().len(); for (path, node) in self.iter_nodes() { - if node.entry.is_some() { + if let Some(entry) = &node.entry { size += packed_entry_size( path, node.copy_source.as_ref().map(|p| &**p), - ) + ); + if entry.mtime_is_ambiguous(now) { + ambiguous_mtimes.push(path.clone()) + } } } + self.clear_known_ambiguous_mtimes(&ambiguous_mtimes); let mut packed = Vec::with_capacity(size); packed.extend(parents.as_bytes()); - let now: i32 = now.0.try_into().expect("time overflow"); - for (path, opt_entry, copy_source) in self.iter_node_data_mut() { - if let Some(entry) = opt_entry { - clear_ambiguous_mtime(entry, now); + for (path, node) in self.iter_nodes() { + if let Some(entry) = &node.entry { pack_entry( path, entry, - copy_source.as_ref().map(|p| &**p), + node.copy_source.as_ref().map(|p| &**p), &mut packed, ); } @@ -558,7 +528,21 @@ parents: DirstateParents, now: Timestamp, ) -> Result<Vec<u8>, DirstateError> { - on_disk::write(self, parents, now) + // TODO: how do we want to handle this in 2038? + let now: i32 = now.0.try_into().expect("time overflow"); + let mut paths = Vec::new(); + for (path, node) in self.iter_nodes() { + if let Some(entry) = &node.entry { + if entry.mtime_is_ambiguous(now) { + paths.push(path.clone()) + } + } + } + // Borrow of `self` ends here since we collect cloned paths + + self.clear_known_ambiguous_mtimes(&paths); + + on_disk::write(self, parents) } fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { @@ -592,7 +576,7 @@ Box::new(self.iter_nodes().filter_map(|(path, node)| { node.copy_source .as_ref() - .map(|copy_source| (path, &**copy_source)) + .map(|copy_source| (&**path, &**copy_source)) })) } @@ -649,7 +633,7 @@ fn iter(&self) -> StateMapIter<'_> { Box::new(self.iter_nodes().filter_map(|(path, node)| { - node.entry.as_ref().map(|entry| (path, entry)) + node.entry.as_ref().map(|entry| (&**path, entry)) })) } }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Fri May 28 17:33:20 2021 -0400 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200 @@ -9,8 +9,6 @@ //! Nodes in turn contain slices to variable-size paths, and to their own child //! nodes (if any) for nested files and directories. -use crate::dirstate::parsers::clear_ambiguous_mtime; -use crate::dirstate::parsers::Timestamp; use crate::dirstate_tree::dirstate_map::{self, DirstateMap}; use crate::dirstate_tree::path_with_basename::WithBasename; use crate::errors::HgError; @@ -230,11 +228,7 @@ pub(super) fn write( dirstate_map: &mut DirstateMap, parents: DirstateParents, - now: Timestamp, ) -> Result<Vec<u8>, DirstateError> { - // TODO: how do we want to handle this in 2038? - let now: i32 = now.0.try_into().expect("time overflow"); - let header_len = std::mem::size_of::<Header>(); // This ignores the space for paths, and for nodes without an entry. @@ -248,7 +242,7 @@ // actual offset for the root nodes. out.resize(header_len, 0_u8); - let root = write_nodes(&mut dirstate_map.root, now, &mut out)?; + let root = write_nodes(&mut dirstate_map.root, &mut out)?; let header = Header { marker: *V2_FORMAT_MARKER, @@ -263,10 +257,8 @@ Ok(out) } -/// Serialize the dirstate to the `v2` format after clearing ambigous `mtime`s. fn write_nodes( nodes: &mut dirstate_map::ChildNodes, - now: i32, out: &mut Vec<u8>, ) -> Result<ChildNodes, DirstateError> { // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration @@ -277,7 +269,7 @@ let mut on_disk_nodes = Vec::with_capacity(nodes.len()); for (full_path, node) in nodes { on_disk_nodes.push(Node { - children: write_nodes(&mut node.children, now, out)?, + children: write_nodes(&mut node.children, out)?, tracked_descendants_count: node.tracked_descendants_count.into(), full_path: write_slice::<u8>( full_path.full_path().as_bytes(), @@ -296,7 +288,6 @@ } }, entry: if let Some(entry) = &mut node.entry { - clear_ambiguous_mtime(entry, now); OptEntry { state: entry.state.into(), mode: entry.mode.into(),