changeset 47338:73f23e7610f8

dirstate-tree: Remove DirstateMap::iter_node_data_mut In an upcoming changeset we want DirstateMap to be able to work directly with nodes in their "on disk" representation, without always allocating corresponding in-memory data structures. Nodes would have two possible representations: one immutable "on disk" refering to the bytes buffer of the contents of the .hg/dirstate file, and one mutable with HashMap like the curren data structure. These nodes would have copy-on-write semantics: when an immutable node would need to be mutated, instead we allocate new mutable node for it and its ancestors. A mutable iterator of the entire tree would still be possible, but it would become much more expensive since we’d need to allocate mutable nodes for everything. Instead, remove this iterator. It was only used to clear ambiguous mtimes while serializing the `DirstateMap`. Instead clearing and serialization are now two separate passes. Clearing first uses an immutable iterator to collect the paths of nodes that need to be cleared, then accesses only those nodes mutably. Differential Revision: https://phab.mercurial-scm.org/D10744
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 19 May 2021 13:15:00 +0200
parents 717a94b423b9
children 0252600fd1cf
files rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/dirstate/parsers.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs
diffstat 4 files changed, 71 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/dirstate_map.rs	Fri May 28 17:33:20 2021 -0400
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs	Wed May 19 13:15:00 2021 +0200
@@ -5,7 +5,6 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 
-use crate::dirstate::parsers::clear_ambiguous_mtime;
 use crate::dirstate::parsers::Timestamp;
 use crate::{
     dirstate::EntryState,
@@ -166,7 +165,7 @@
     ) {
         for filename in filenames {
             if let Some(entry) = self.state_map.get_mut(&filename) {
-                if clear_ambiguous_mtime(entry, now) {
+                if entry.clear_ambiguous_mtime(now) {
                     self.get_non_normal_other_parent_entries()
                         .0
                         .insert(filename.to_owned());
--- a/rust/hg-core/src/dirstate/parsers.rs	Fri May 28 17:33:20 2021 -0400
+++ b/rust/hg-core/src/dirstate/parsers.rs	Wed May 19 13:15:00 2021 +0200
@@ -126,25 +126,31 @@
 /// Seconds since the Unix epoch
 pub struct Timestamp(pub u64);
 
-pub fn clear_ambiguous_mtime(
-    entry: &mut DirstateEntry,
-    mtime_now: i32,
-) -> bool {
-    let ambiguous =
-        entry.state == EntryState::Normal && entry.mtime == mtime_now;
-    if ambiguous {
-        // The file was last modified "simultaneously" with the current
-        // write to dirstate (i.e. within the same second for file-
-        // systems with a granularity of 1 sec). This commonly happens
-        // for at least a couple of files on 'update'.
-        // The user could change the file without changing its size
-        // within the same second. Invalidate the file's mtime in
-        // dirstate, forcing future 'status' calls to compare the
-        // contents of the file if the size is the same. This prevents
-        // mistakenly treating such files as clean.
-        entry.mtime = -1;
+impl DirstateEntry {
+    pub fn mtime_is_ambiguous(&self, now: i32) -> bool {
+        self.state == EntryState::Normal && self.mtime == now
     }
-    ambiguous
+
+    pub fn clear_ambiguous_mtime(&mut self, now: i32) -> bool {
+        let ambiguous = self.mtime_is_ambiguous(now);
+        if ambiguous {
+            // The file was last modified "simultaneously" with the current
+            // write to dirstate (i.e. within the same second for file-
+            // systems with a granularity of 1 sec). This commonly happens
+            // for at least a couple of files on 'update'.
+            // The user could change the file without changing its size
+            // within the same second. Invalidate the file's mtime in
+            // dirstate, forcing future 'status' calls to compare the
+            // contents of the file if the size is the same. This prevents
+            // mistakenly treating such files as clean.
+            self.clear_mtime()
+        }
+        ambiguous
+    }
+
+    pub fn clear_mtime(&mut self) {
+        self.mtime = -1;
+    }
 }
 
 pub fn pack_dirstate(
@@ -170,7 +176,7 @@
     packed.extend(parents.p2.as_bytes());
 
     for (filename, entry) in state_map.iter_mut() {
-        clear_ambiguous_mtime(entry, now);
+        entry.clear_ambiguous_mtime(now);
         pack_entry(
             filename,
             entry,
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Fri May 28 17:33:20 2021 -0400
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Wed May 19 13:15:00 2021 +0200
@@ -6,7 +6,6 @@
 
 use super::on_disk;
 use super::path_with_basename::WithBasename;
-use crate::dirstate::parsers::clear_ambiguous_mtime;
 use crate::dirstate::parsers::pack_entry;
 use crate::dirstate::parsers::packed_entry_size;
 use crate::dirstate::parsers::parse_dirstate_entries;
@@ -79,13 +78,6 @@
     }
 }
 
-/// `(full_path, entry, copy_source)`
-type NodeDataMut<'tree, 'on_disk> = (
-    &'tree HgPath,
-    &'tree mut Option<DirstateEntry>,
-    &'tree mut Option<Cow<'on_disk, HgPath>>,
-);
-
 impl<'on_disk> DirstateMap<'on_disk> {
     pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self {
         Self {
@@ -252,7 +244,7 @@
 
     fn iter_nodes<'a>(
         &'a self,
-    ) -> impl Iterator<Item = (&'a HgPath, &'a Node)> + 'a {
+    ) -> impl Iterator<Item = (&'a Cow<'on_disk, HgPath>, &'a Node)> + 'a {
         // Depth first tree traversal.
         //
         // If we could afford internal iteration and recursion,
@@ -279,7 +271,7 @@
                 // Pseudo-recursion
                 let new_iter = child_node.children.iter();
                 let old_iter = std::mem::replace(&mut iter, new_iter);
-                let key = &**key.full_path();
+                let key = key.full_path();
                 stack.push((key, child_node, old_iter));
             }
             // Found the end of a `children.iter()` iterator.
@@ -296,42 +288,16 @@
         })
     }
 
-    /// Mutable iterator for the `(entry, copy source)` of each node.
-    ///
-    /// It would not be safe to yield mutable references to nodes themeselves
-    /// with `-> impl Iterator<Item = &mut Node>` since child nodes are
-    /// reachable from their ancestor nodes, potentially creating multiple
-    /// `&mut` references to a given node.
-    fn iter_node_data_mut<'tree>(
-        &'tree mut self,
-    ) -> impl Iterator<Item = NodeDataMut<'tree, 'on_disk>> + 'tree {
-        // Explict stack for pseudo-recursion, see `iter_nodes` above.
-        let mut stack = Vec::new();
-        let mut iter = self.root.iter_mut();
-        std::iter::from_fn(move || {
-            while let Some((key, child_node)) = iter.next() {
-                // Pseudo-recursion
-                let data = (
-                    &**key.full_path(),
-                    &mut child_node.entry,
-                    &mut child_node.copy_source,
-                );
-                let new_iter = child_node.children.iter_mut();
-                let old_iter = std::mem::replace(&mut iter, new_iter);
-                stack.push((data, old_iter));
+    fn clear_known_ambiguous_mtimes(&mut self, paths: &[impl AsRef<HgPath>]) {
+        for path in paths {
+            if let Some(node) =
+                Self::get_node_mut(&mut self.root, path.as_ref())
+            {
+                if let Some(entry) = node.entry.as_mut() {
+                    entry.clear_mtime();
+                }
             }
-            // Found the end of a `children.values_mut()` iterator.
-            if let Some((data, next_iter)) = stack.pop() {
-                // "Return" from pseudo-recursion by restoring state from the
-                // explicit stack
-                iter = next_iter;
-
-                Some(data)
-            } else {
-                // Reached the bottom of the stack, we’re done
-                None
-            }
-        })
+        }
     }
 }
 
@@ -427,7 +393,7 @@
         for filename in filenames {
             if let Some(node) = Self::get_node_mut(&mut self.root, &filename) {
                 if let Some(entry) = node.entry.as_mut() {
-                    clear_ambiguous_mtime(entry, now);
+                    entry.clear_ambiguous_mtime(now);
                 }
             }
         }
@@ -453,7 +419,7 @@
                 .filter(|entry| {
                     entry.is_non_normal() || entry.is_from_other_parent()
                 })
-                .map(|_| path)
+                .map(|_| &**path)
         }))
     }
 
@@ -475,7 +441,7 @@
             node.entry
                 .as_ref()
                 .filter(|entry| entry.is_non_normal())
-                .map(|_| path)
+                .map(|_| &**path)
         }))
     }
 
@@ -486,7 +452,7 @@
             node.entry
                 .as_ref()
                 .filter(|entry| entry.is_from_other_parent())
-                .map(|_| path)
+                .map(|_| &**path)
         }))
     }
 
@@ -522,29 +488,33 @@
         parents: DirstateParents,
         now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
+        let now: i32 = now.0.try_into().expect("time overflow");
+        let mut ambiguous_mtimes = Vec::new();
         // Optizimation (to be measured?): pre-compute size to avoid `Vec`
         // reallocations
         let mut size = parents.as_bytes().len();
         for (path, node) in self.iter_nodes() {
-            if node.entry.is_some() {
+            if let Some(entry) = &node.entry {
                 size += packed_entry_size(
                     path,
                     node.copy_source.as_ref().map(|p| &**p),
-                )
+                );
+                if entry.mtime_is_ambiguous(now) {
+                    ambiguous_mtimes.push(path.clone())
+                }
             }
         }
+        self.clear_known_ambiguous_mtimes(&ambiguous_mtimes);
 
         let mut packed = Vec::with_capacity(size);
         packed.extend(parents.as_bytes());
 
-        let now: i32 = now.0.try_into().expect("time overflow");
-        for (path, opt_entry, copy_source) in self.iter_node_data_mut() {
-            if let Some(entry) = opt_entry {
-                clear_ambiguous_mtime(entry, now);
+        for (path, node) in self.iter_nodes() {
+            if let Some(entry) = &node.entry {
                 pack_entry(
                     path,
                     entry,
-                    copy_source.as_ref().map(|p| &**p),
+                    node.copy_source.as_ref().map(|p| &**p),
                     &mut packed,
                 );
             }
@@ -558,7 +528,21 @@
         parents: DirstateParents,
         now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
-        on_disk::write(self, parents, now)
+        // TODO: how do we want to handle this in 2038?
+        let now: i32 = now.0.try_into().expect("time overflow");
+        let mut paths = Vec::new();
+        for (path, node) in self.iter_nodes() {
+            if let Some(entry) = &node.entry {
+                if entry.mtime_is_ambiguous(now) {
+                    paths.push(path.clone())
+                }
+            }
+        }
+        // Borrow of `self` ends here since we collect cloned paths
+
+        self.clear_known_ambiguous_mtimes(&paths);
+
+        on_disk::write(self, parents)
     }
 
     fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
@@ -592,7 +576,7 @@
         Box::new(self.iter_nodes().filter_map(|(path, node)| {
             node.copy_source
                 .as_ref()
-                .map(|copy_source| (path, &**copy_source))
+                .map(|copy_source| (&**path, &**copy_source))
         }))
     }
 
@@ -649,7 +633,7 @@
 
     fn iter(&self) -> StateMapIter<'_> {
         Box::new(self.iter_nodes().filter_map(|(path, node)| {
-            node.entry.as_ref().map(|entry| (path, entry))
+            node.entry.as_ref().map(|entry| (&**path, entry))
         }))
     }
 }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Fri May 28 17:33:20 2021 -0400
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Wed May 19 13:15:00 2021 +0200
@@ -9,8 +9,6 @@
 //! Nodes in turn contain slices to variable-size paths, and to their own child
 //! nodes (if any) for nested files and directories.
 
-use crate::dirstate::parsers::clear_ambiguous_mtime;
-use crate::dirstate::parsers::Timestamp;
 use crate::dirstate_tree::dirstate_map::{self, DirstateMap};
 use crate::dirstate_tree::path_with_basename::WithBasename;
 use crate::errors::HgError;
@@ -230,11 +228,7 @@
 pub(super) fn write(
     dirstate_map: &mut DirstateMap,
     parents: DirstateParents,
-    now: Timestamp,
 ) -> Result<Vec<u8>, DirstateError> {
-    // TODO: how do we want to handle this in 2038?
-    let now: i32 = now.0.try_into().expect("time overflow");
-
     let header_len = std::mem::size_of::<Header>();
 
     // This ignores the space for paths, and for nodes without an entry.
@@ -248,7 +242,7 @@
     // actual offset for the root nodes.
     out.resize(header_len, 0_u8);
 
-    let root = write_nodes(&mut dirstate_map.root, now, &mut out)?;
+    let root = write_nodes(&mut dirstate_map.root, &mut out)?;
 
     let header = Header {
         marker: *V2_FORMAT_MARKER,
@@ -263,10 +257,8 @@
     Ok(out)
 }
 
-/// Serialize the dirstate to the `v2` format after clearing ambigous `mtime`s.
 fn write_nodes(
     nodes: &mut dirstate_map::ChildNodes,
-    now: i32,
     out: &mut Vec<u8>,
 ) -> Result<ChildNodes, DirstateError> {
     // `dirstate_map::ChildNodes` is a `HashMap` with undefined iteration
@@ -277,7 +269,7 @@
     let mut on_disk_nodes = Vec::with_capacity(nodes.len());
     for (full_path, node) in nodes {
         on_disk_nodes.push(Node {
-            children: write_nodes(&mut node.children, now, out)?,
+            children: write_nodes(&mut node.children, out)?,
             tracked_descendants_count: node.tracked_descendants_count.into(),
             full_path: write_slice::<u8>(
                 full_path.full_path().as_bytes(),
@@ -296,7 +288,6 @@
                 }
             },
             entry: if let Some(entry) = &mut node.entry {
-                clear_ambiguous_mtime(entry, now);
                 OptEntry {
                     state: entry.state.into(),
                     mode: entry.mode.into(),