# HG changeset patch # User Raphaël Gomès # Date 1677603495 -3600 # Node ID 07d030b38097d7c1ecf4fb39e7ffe14c721705d9 # Parent 6cce0afc1454b72817cb2439331803d0a9818632 rust-dirstate-v2: don't write dirstate if data file has changed This fixes the following race: - process A reads the dirstate - process B reads and writes the dirstate - process A writes the dirstate This either resulted in losing what process B had just written or a crash because the `uuid` had changed and we were trying to write to a file that doesn't exist. More explanations inside. This doesn't fix the issue for dirstate-v1, a later patch addresses it. diff -r 6cce0afc1454 -r 07d030b38097 rust/hg-core/src/repo.rs --- a/rust/hg-core/src/repo.rs Mon Dec 12 17:08:12 2022 +0100 +++ b/rust/hg-core/src/repo.rs Tue Feb 28 17:58:15 2023 +0100 @@ -34,7 +34,6 @@ requirements: HashSet, config: Config, dirstate_parents: LazyCell, - dirstate_data_file_uuid: LazyCell>>, dirstate_map: LazyCell, changelog: LazyCell, manifestlog: LazyCell, @@ -187,7 +186,6 @@ dot_hg, config: repo_config, dirstate_parents: LazyCell::new(), - dirstate_data_file_uuid: LazyCell::new(), dirstate_map: LazyCell::new(), changelog: LazyCell::new(), manifestlog: LazyCell::new(), @@ -270,15 +268,10 @@ fn read_dirstate_parents(&self) -> Result { let dirstate = self.dirstate_file_contents()?; let parents = if dirstate.is_empty() { - if self.has_dirstate_v2() { - self.dirstate_data_file_uuid.set(None); - } DirstateParents::NULL } else if self.has_dirstate_v2() { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; - self.dirstate_data_file_uuid - .set(Some(docket.uuid.to_owned())); docket.parents() } else { crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? @@ -288,9 +281,13 @@ Ok(parents) } - fn read_dirstate_data_file_uuid( + /// Returns the information read from the dirstate docket necessary to + /// check if the data file has been updated/deleted by another process + /// since we last read the dirstate. + /// Namely, the data file uuid and the data size. + fn get_dirstate_data_file_integrity( &self, - ) -> Result>, HgError> { + ) -> Result<(Option>, usize), HgError> { assert!( self.has_dirstate_v2(), "accessing dirstate data file ID without dirstate-v2" @@ -298,12 +295,12 @@ let dirstate = self.dirstate_file_contents()?; if dirstate.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - Ok(None) + Ok((None, 0)) } else { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; self.dirstate_parents.set(docket.parents()); - Ok(Some(docket.uuid.to_owned())) + Ok((Some(docket.uuid.to_owned()), docket.data_size())) } } @@ -370,7 +367,6 @@ let dirstate_file_contents = self.dirstate_file_contents()?; if dirstate_file_contents.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - self.dirstate_data_file_uuid.set(None); return Ok(OwningDirstateMap::new_empty(Vec::new())); } let docket = crate::dirstate_tree::on_disk::read_docket( @@ -381,8 +377,6 @@ "dirstate.post-docket-read-file", ); self.dirstate_parents.set(docket.parents()); - self.dirstate_data_file_uuid - .set(Some(docket.uuid.to_owned())); let uuid = docket.uuid.to_owned(); let data_size = docket.data_size(); @@ -540,10 +534,27 @@ // it’s unset let parents = self.dirstate_parents()?; let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() { - let uuid_opt = self - .dirstate_data_file_uuid - .get_or_init(|| self.read_dirstate_data_file_uuid())?; - let uuid_opt = uuid_opt.as_ref(); + let (uuid, data_size) = self.get_dirstate_data_file_integrity()?; + let uuid_changed = uuid.as_deref() != map.old_uuid(); + let data_length_changed = data_size != map.old_data_size(); + + if uuid_changed || data_length_changed { + // If uuid or length changed since last disk read, don't write. + // This is fine because either we're in a command that doesn't + // write anything too important (like `hg status`), or we're in + // `hg add` and we're supposed to have taken the lock before + // reading anyway. + // + // TODO complain loudly if we've changed anything important + // without taking the lock. + // (see `hg help config.format.use-dirstate-tracked-hint`) + log::debug!( + "dirstate has changed since last read, not updating." + ); + return Ok(()); + } + + let uuid_opt = map.old_uuid(); let write_mode = if uuid_opt.is_some() { DirstateMapWriteMode::Auto } else { diff -r 6cce0afc1454 -r 07d030b38097 tests/test-dirstate-read-race.t --- a/tests/test-dirstate-read-race.t Mon Dec 12 17:08:12 2022 +0100 +++ b/tests/test-dirstate-read-race.t Tue Feb 28 17:58:15 2023 +0100 @@ -312,25 +312,6 @@ 0 files updated, 0 files merged, 6 files removed, 0 files unresolved $ touch $TESTTMP/status-race-lock $ wait -#if rhg dirstate-v2-append pre-some-read - $ hg log -GT '{node|short} {desc}\n' - @ 9a86dcbfb938 more files to have two commit - | - o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append - - $ hg status - A dir/o - R dir/nested/m - ! dir/i - ! dir/j - ! dir/nested/h - ! dir2/k - ! dir2/l - ! g - ? dir/n - ? p - ? q -#else $ hg log -GT '{node|short} {desc}\n' o 9a86dcbfb938 more files to have two commit | @@ -341,7 +322,6 @@ ? dir/n ? p ? q -#endif The status process should return a consistent result and not crash. diff -r 6cce0afc1454 -r 07d030b38097 tests/test-dirstate-status-write-race.t --- a/tests/test-dirstate-status-write-race.t Mon Dec 12 17:08:12 2022 +0100 +++ b/tests/test-dirstate-status-write-race.t Tue Feb 28 17:58:15 2023 +0100 @@ -242,12 +242,12 @@ The file should in a "added" state $ hg status - A dir/n (no-rhg !) - A dir/n (rhg dirstate-v2-rewrite !) + A dir/n (no-rhg dirstate-v1 !) + A dir/n (no-dirstate-v1 !) A dir/n (missing-correct-output rhg dirstate-v1 !) A dir/o R dir/nested/m - ? dir/n (known-bad-output rhg no-dirstate-v2-rewrite !) + ? dir/n (known-bad-output rhg dirstate-v1 !) ? p ? q @@ -260,7 +260,6 @@ ? p ? q $ cat $TESTTMP/status-race-lock.log - abort: when writing $TESTTMP/race-with-add/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !) final cleanup @@ -291,20 +290,7 @@ The parent must change and the status should be clean # XXX rhg misbehaves here -#if no-rhg - $ hg summary - parent: 2:2e3b442a2fd4 tip - created-during-status - branch: default - commit: 1 removed, 3 unknown - update: (current) - phases: 3 draft - $ hg status - R dir/nested/m - ? dir/n - ? p - ? q -#else +#if rhg dirstate-v1 $ hg summary parent: 1:c349430a1631 more files to have two commits @@ -318,6 +304,19 @@ ? dir/n ? p ? q +#else + $ hg summary + parent: 2:2e3b442a2fd4 tip + created-during-status + branch: default + commit: 1 removed, 3 unknown + update: (current) + phases: 3 draft + $ hg status + R dir/nested/m + ? dir/n + ? p + ? q #endif The status process should return a consistent result and not crash. @@ -329,7 +328,6 @@ ? p ? q $ cat $TESTTMP/status-race-lock.log - abort: when removing $TESTTMP/race-with-commit/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !) final cleanup @@ -418,9 +416,9 @@ the first update should be on disk $ hg debugstate --all | grep "g" + n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !) + n 644 0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !) n 644 0 2000-01-01 00:25:00 g (no-rhg !) - n 644 0 2000-01-01 00:25:00 g (missing-correct-output rhg !) - n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg !) The status process should return a consistent result and not crash. @@ -431,7 +429,6 @@ ? p ? q $ cat $TESTTMP/status-race-lock.log - abort: when removing $TESTTMP/race-with-status/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !) final cleanup