rust-dirstate-v2: don't write dirstate if data file has changed stable
authorRaphaël Gomès <rgomes@octobus.net>
Tue, 28 Feb 2023 17:58:15 +0100
branchstable
changeset 49702 07d030b38097
parent 49701 6cce0afc1454
child 49703 dbe09fb038fc
rust-dirstate-v2: don't write dirstate if data file has changed This fixes the following race: - process A reads the dirstate - process B reads and writes the dirstate - process A writes the dirstate This either resulted in losing what process B had just written or a crash because the `uuid` had changed and we were trying to write to a file that doesn't exist. More explanations inside. This doesn't fix the issue for dirstate-v1, a later patch addresses it.
rust/hg-core/src/repo.rs
tests/test-dirstate-read-race.t
tests/test-dirstate-status-write-race.t
--- a/rust/hg-core/src/repo.rs	Mon Dec 12 17:08:12 2022 +0100
+++ b/rust/hg-core/src/repo.rs	Tue Feb 28 17:58:15 2023 +0100
@@ -34,7 +34,6 @@
     requirements: HashSet<String>,
     config: Config,
     dirstate_parents: LazyCell<DirstateParents>,
-    dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
     dirstate_map: LazyCell<OwningDirstateMap>,
     changelog: LazyCell<Changelog>,
     manifestlog: LazyCell<Manifestlog>,
@@ -187,7 +186,6 @@
             dot_hg,
             config: repo_config,
             dirstate_parents: LazyCell::new(),
-            dirstate_data_file_uuid: LazyCell::new(),
             dirstate_map: LazyCell::new(),
             changelog: LazyCell::new(),
             manifestlog: LazyCell::new(),
@@ -270,15 +268,10 @@
     fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
         let dirstate = self.dirstate_file_contents()?;
         let parents = if dirstate.is_empty() {
-            if self.has_dirstate_v2() {
-                self.dirstate_data_file_uuid.set(None);
-            }
             DirstateParents::NULL
         } else if self.has_dirstate_v2() {
             let docket =
                 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
-            self.dirstate_data_file_uuid
-                .set(Some(docket.uuid.to_owned()));
             docket.parents()
         } else {
             crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
@@ -288,9 +281,13 @@
         Ok(parents)
     }
 
-    fn read_dirstate_data_file_uuid(
+    /// Returns the information read from the dirstate docket necessary to
+    /// check if the data file has been updated/deleted by another process
+    /// since we last read the dirstate.
+    /// Namely, the data file uuid and the data size.
+    fn get_dirstate_data_file_integrity(
         &self,
-    ) -> Result<Option<Vec<u8>>, HgError> {
+    ) -> Result<(Option<Vec<u8>>, usize), HgError> {
         assert!(
             self.has_dirstate_v2(),
             "accessing dirstate data file ID without dirstate-v2"
@@ -298,12 +295,12 @@
         let dirstate = self.dirstate_file_contents()?;
         if dirstate.is_empty() {
             self.dirstate_parents.set(DirstateParents::NULL);
-            Ok(None)
+            Ok((None, 0))
         } else {
             let docket =
                 crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
             self.dirstate_parents.set(docket.parents());
-            Ok(Some(docket.uuid.to_owned()))
+            Ok((Some(docket.uuid.to_owned()), docket.data_size()))
         }
     }
 
@@ -370,7 +367,6 @@
         let dirstate_file_contents = self.dirstate_file_contents()?;
         if dirstate_file_contents.is_empty() {
             self.dirstate_parents.set(DirstateParents::NULL);
-            self.dirstate_data_file_uuid.set(None);
             return Ok(OwningDirstateMap::new_empty(Vec::new()));
         }
         let docket = crate::dirstate_tree::on_disk::read_docket(
@@ -381,8 +377,6 @@
             "dirstate.post-docket-read-file",
         );
         self.dirstate_parents.set(docket.parents());
-        self.dirstate_data_file_uuid
-            .set(Some(docket.uuid.to_owned()));
         let uuid = docket.uuid.to_owned();
         let data_size = docket.data_size();
 
@@ -540,10 +534,27 @@
         // it’s unset
         let parents = self.dirstate_parents()?;
         let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
-            let uuid_opt = self
-                .dirstate_data_file_uuid
-                .get_or_init(|| self.read_dirstate_data_file_uuid())?;
-            let uuid_opt = uuid_opt.as_ref();
+            let (uuid, data_size) = self.get_dirstate_data_file_integrity()?;
+            let uuid_changed = uuid.as_deref() != map.old_uuid();
+            let data_length_changed = data_size != map.old_data_size();
+
+            if uuid_changed || data_length_changed {
+                // If uuid or length changed since last disk read, don't write.
+                // This is fine because either we're in a command that doesn't
+                // write anything too important (like `hg status`), or we're in
+                // `hg add` and we're supposed to have taken the lock before
+                // reading anyway.
+                //
+                // TODO complain loudly if we've changed anything important
+                // without taking the lock.
+                // (see `hg help config.format.use-dirstate-tracked-hint`)
+                log::debug!(
+                    "dirstate has changed since last read, not updating."
+                );
+                return Ok(());
+            }
+
+            let uuid_opt = map.old_uuid();
             let write_mode = if uuid_opt.is_some() {
                 DirstateMapWriteMode::Auto
             } else {
--- a/tests/test-dirstate-read-race.t	Mon Dec 12 17:08:12 2022 +0100
+++ b/tests/test-dirstate-read-race.t	Tue Feb 28 17:58:15 2023 +0100
@@ -312,25 +312,6 @@
   0 files updated, 0 files merged, 6 files removed, 0 files unresolved
   $ touch $TESTTMP/status-race-lock
   $ wait
-#if rhg dirstate-v2-append pre-some-read
-  $ hg log -GT '{node|short} {desc}\n'
-  @  9a86dcbfb938 more files to have two commit
-  |
-  o  4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append
-  
-  $ hg status
-  A dir/o
-  R dir/nested/m
-  ! dir/i
-  ! dir/j
-  ! dir/nested/h
-  ! dir2/k
-  ! dir2/l
-  ! g
-  ? dir/n
-  ? p
-  ? q
-#else
   $ hg log -GT '{node|short} {desc}\n'
   o  9a86dcbfb938 more files to have two commit
   |
@@ -341,7 +322,6 @@
   ? dir/n
   ? p
   ? q
-#endif
 
 The status process should return a consistent result and not crash.
 
--- a/tests/test-dirstate-status-write-race.t	Mon Dec 12 17:08:12 2022 +0100
+++ b/tests/test-dirstate-status-write-race.t	Tue Feb 28 17:58:15 2023 +0100
@@ -242,12 +242,12 @@
 The file should in a "added" state
 
   $ hg status
-  A dir/n (no-rhg !)
-  A dir/n (rhg dirstate-v2-rewrite !)
+  A dir/n (no-rhg dirstate-v1 !)
+  A dir/n (no-dirstate-v1 !)
   A dir/n (missing-correct-output rhg dirstate-v1 !)
   A dir/o
   R dir/nested/m
-  ? dir/n (known-bad-output rhg no-dirstate-v2-rewrite !)
+  ? dir/n (known-bad-output rhg dirstate-v1 !)
   ? p
   ? q
 
@@ -260,7 +260,6 @@
   ? p
   ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: when writing $TESTTMP/race-with-add/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !)
 
 final cleanup
 
@@ -291,20 +290,7 @@
 The parent must change and the status should be clean
 
 # XXX rhg misbehaves here
-#if no-rhg
-  $ hg summary
-  parent: 2:2e3b442a2fd4 tip
-   created-during-status
-  branch: default
-  commit: 1 removed, 3 unknown
-  update: (current)
-  phases: 3 draft
-  $ hg status
-  R dir/nested/m
-  ? dir/n
-  ? p
-  ? q
-#else
+#if rhg dirstate-v1
   $ hg summary
   parent: 1:c349430a1631 
    more files to have two commits
@@ -318,6 +304,19 @@
   ? dir/n
   ? p
   ? q
+#else
+  $ hg summary
+  parent: 2:2e3b442a2fd4 tip
+   created-during-status
+  branch: default
+  commit: 1 removed, 3 unknown
+  update: (current)
+  phases: 3 draft
+  $ hg status
+  R dir/nested/m
+  ? dir/n
+  ? p
+  ? q
 #endif
 
 The status process should return a consistent result and not crash.
@@ -329,7 +328,6 @@
   ? p
   ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: when removing $TESTTMP/race-with-commit/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !)
 
 final cleanup
 
@@ -418,9 +416,9 @@
 the first update should be on disk
 
   $ hg debugstate --all | grep "g"
+  n 644          0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !)
+  n 644          0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !)
   n 644          0 2000-01-01 00:25:00 g (no-rhg !)
-  n 644          0 2000-01-01 00:25:00 g (missing-correct-output rhg !)
-  n 644          0 2000-01-01 00:10:00 g (known-bad-output rhg !)
 
 The status process should return a consistent result and not crash.
 
@@ -431,7 +429,6 @@
   ? p
   ? q
   $ cat $TESTTMP/status-race-lock.log
-  abort: when removing $TESTTMP/race-with-status/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !)
 
 final cleanup