rust-dirstate-v2: save proper data size if no new data on append stable
authorRaphaël Gomès <rgomes@octobus.net>
Thu, 21 Apr 2022 14:47:13 +0200
branchstable
changeset 49145 dd2503a63d33
parent 49068 90e564882f07
child 49146 802e2c25dab8
rust-dirstate-v2: save proper data size if no new data on append This is currently only triggered with the tests ran with `--rhg` without `--rust`, by "luck", there probably always was something to write, like an mtime when also using Rust extensions alongside `rhg`. Differential Revision: https://phab.mercurial-scm.org/D12580
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-core/src/repo.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Wed Apr 13 14:46:22 2022 -0400
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu Apr 21 14:47:13 2022 +0200
@@ -49,6 +49,10 @@
 
     /// How many bytes of `on_disk` are not used anymore
     pub(super) unreachable_bytes: u32,
+
+    /// Size of the data used to first load this `DirstateMap`. Used in case
+    /// we need to write some new metadata, but no new data on disk.
+    pub(super) old_data_size: usize,
 }
 
 /// Using a plain `HgPathBuf` of the full path from the repository root as a
@@ -429,6 +433,7 @@
             nodes_with_copy_source_count: 0,
             ignore_patterns_hash: [0; on_disk::IGNORE_PATTERNS_HASH_LEN],
             unreachable_bytes: 0,
+            old_data_size: 0,
         }
     }
 
@@ -989,12 +994,13 @@
     /// Returns new data and metadata together with whether that data should be
     /// appended to the existing data file whose content is at
     /// `map.on_disk` (true), instead of written to a new data file
-    /// (false).
+    /// (false), and the previous size of data on disk.
     #[timed]
     pub fn pack_v2(
         &self,
         can_append: bool,
-    ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool), DirstateError> {
+    ) -> Result<(Vec<u8>, on_disk::TreeMetadata, bool, usize), DirstateError>
+    {
         let map = self.get_map();
         on_disk::write(map, can_append)
     }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Wed Apr 13 14:46:22 2022 -0400
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Apr 21 14:47:13 2022 +0200
@@ -290,6 +290,7 @@
         nodes_with_copy_source_count: meta.nodes_with_copy_source_count.get(),
         ignore_patterns_hash: meta.ignore_patterns_hash,
         unreachable_bytes: meta.unreachable_bytes.get(),
+        old_data_size: on_disk.len(),
     };
     Ok(dirstate_map)
 }
@@ -601,11 +602,11 @@
 /// Returns new data and metadata, together with whether that data should be
 /// appended to the existing data file whose content is at
 /// `dirstate_map.on_disk` (true), instead of written to a new data file
-/// (false).
+/// (false), and the previous size of data on disk.
 pub(super) fn write(
     dirstate_map: &DirstateMap,
     can_append: bool,
-) -> Result<(Vec<u8>, TreeMetadata, bool), DirstateError> {
+) -> Result<(Vec<u8>, TreeMetadata, bool, usize), DirstateError> {
     let append = can_append && dirstate_map.write_should_append();
 
     // This ignores the space for paths, and for nodes without an entry.
@@ -631,7 +632,7 @@
         unused: [0; 4],
         ignore_patterns_hash: dirstate_map.ignore_patterns_hash,
     };
-    Ok((writer.out, meta, append))
+    Ok((writer.out, meta, append, dirstate_map.old_data_size))
 }
 
 struct Writer<'dmap, 'on_disk> {
--- a/rust/hg-core/src/repo.rs	Wed Apr 13 14:46:22 2022 -0400
+++ b/rust/hg-core/src/repo.rs	Thu Apr 21 14:47:13 2022 +0200
@@ -438,7 +438,8 @@
             let uuid = self.dirstate_data_file_uuid.get_or_init(self)?;
             let mut uuid = uuid.as_ref();
             let can_append = uuid.is_some();
-            let (data, tree_metadata, append) = map.pack_v2(can_append)?;
+            let (data, tree_metadata, append, old_data_size) =
+                map.pack_v2(can_append)?;
             if !append {
                 uuid = None
             }
@@ -464,10 +465,19 @@
                 // returns `ErrorKind::AlreadyExists`? Collision chance of two
                 // random IDs is one in 2**32
                 let mut file = options.open(&data_filename)?;
-                file.write_all(&data)?;
-                file.flush()?;
-                // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
-                file.seek(SeekFrom::Current(0))
+                if data.is_empty() {
+                    // If we're not appending anything, the data size is the
+                    // same as in the previous docket. It is *not* the file
+                    // length, since it could have garbage at the end.
+                    // We don't have to worry about it when we do have data
+                    // to append since we rewrite the root node in this case.
+                    Ok(old_data_size as u64)
+                } else {
+                    file.write_all(&data)?;
+                    file.flush()?;
+                    // TODO: use https://doc.rust-lang.org/std/io/trait.Seek.html#method.stream_position when we require Rust 1.51+
+                    file.seek(SeekFrom::Current(0))
+                }
             })()
             .when_writing_file(&data_filename)?;
             DirstateDocket::serialize(
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Wed Apr 13 14:46:22 2022 -0400
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Thu Apr 21 14:47:13 2022 +0200
@@ -211,7 +211,7 @@
         let inner = self.inner(py).borrow();
         let result = inner.pack_v2(can_append);
         match result {
-            Ok((packed, tree_metadata, append)) => {
+            Ok((packed, tree_metadata, append, _old_data_size)) => {
                 let packed = PyBytes::new(py, &packed);
                 let tree_metadata = PyBytes::new(py, tree_metadata.as_bytes());
                 let tuple = (packed, tree_metadata, append);