diff rust/hg-core/src/dirstate_tree/on_disk.rs @ 51616:9dbbaecfc950 stable

dirstate-v2: add check of parent/child nodes being related when writing This stems from a corruption seen in a private repository. We're not sure of the source of the corruption, and it's very possible that we're seeing compounded effects of multiple writes on a corrupted dirstate. Adding this check is not expensive in itself and large writes of the dirstate are not common. This change does not catch this problem at the root node, the next one will.
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 06 May 2024 13:02:52 +0200
parents d58e754f2db0
children f808fa119212
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon May 06 16:29:00 2024 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon May 06 13:02:52 2024 +0200
@@ -691,9 +691,11 @@
         // First accumulate serialized nodes in a `Vec`
         let mut on_disk_nodes = Vec::with_capacity(nodes_len);
         for node in nodes {
-            let children =
-                self.write_nodes(node.children(self.dirstate_map.on_disk)?)?;
+            let children = node.children(self.dirstate_map.on_disk)?;
             let full_path = node.full_path(self.dirstate_map.on_disk)?;
+            self.check_children(&children, full_path)?;
+
+            let children = self.write_nodes(children)?;
             let full_path = self.write_path(full_path.as_bytes());
             let copy_source = if let Some(source) =
                 node.copy_source(self.dirstate_map.on_disk)?
@@ -771,6 +773,37 @@
         Ok(ChildNodes { start, len })
     }
 
+    /// Catch some dirstate corruptions before writing them to disk
+    fn check_children(
+        &mut self,
+        children: &dirstate_map::ChildNodesRef,
+        full_path: &HgPath,
+    ) -> Result<(), DirstateError> {
+        for child in children.iter() {
+            let child_full_path =
+                child.full_path(self.dirstate_map.on_disk)?;
+
+            let prefix_length = child_full_path.len()
+                // remove the filename
+                - child.base_name(self.dirstate_map.on_disk)?.len()
+                // remove the slash
+                - 1;
+
+            let child_prefix = &child_full_path.as_bytes()[..prefix_length];
+
+            if child_prefix != full_path.as_bytes() {
+                let explanation = format!(
+                    "dirstate child node's path '{}' \
+                        does not start with its parent's path '{}'",
+                    child_full_path, full_path,
+                );
+
+                return Err(HgError::corrupted(explanation).into());
+            }
+        }
+        Ok(())
+    }
+
     /// If the given slice of items is within `on_disk`, returns its offset
     /// from the start of `on_disk`.
     fn on_disk_offset_of<T>(&self, slice: &[T]) -> Option<Offset>