Mercurial > hg
changeset 50244:07d030b38097 stable
rust-dirstate-v2: don't write dirstate if data file has changed
This fixes the following race:
- process A reads the dirstate
- process B reads and writes the dirstate
- process A writes the dirstate
This either resulted in losing what process B had just written or a crash
because the `uuid` had changed and we were trying to write to a file that
doesn't exist. More explanations inside.
This doesn't fix the issue for dirstate-v1, a later patch addresses it.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Tue, 28 Feb 2023 17:58:15 +0100 |
parents | 6cce0afc1454 |
children | dbe09fb038fc |
files | rust/hg-core/src/repo.rs tests/test-dirstate-read-race.t tests/test-dirstate-status-write-race.t |
diffstat | 3 files changed, 48 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/repo.rs Mon Dec 12 17:08:12 2022 +0100 +++ b/rust/hg-core/src/repo.rs Tue Feb 28 17:58:15 2023 +0100 @@ -34,7 +34,6 @@ requirements: HashSet<String>, config: Config, dirstate_parents: LazyCell<DirstateParents>, - dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>, dirstate_map: LazyCell<OwningDirstateMap>, changelog: LazyCell<Changelog>, manifestlog: LazyCell<Manifestlog>, @@ -187,7 +186,6 @@ dot_hg, config: repo_config, dirstate_parents: LazyCell::new(), - dirstate_data_file_uuid: LazyCell::new(), dirstate_map: LazyCell::new(), changelog: LazyCell::new(), manifestlog: LazyCell::new(), @@ -270,15 +268,10 @@ fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> { let dirstate = self.dirstate_file_contents()?; let parents = if dirstate.is_empty() { - if self.has_dirstate_v2() { - self.dirstate_data_file_uuid.set(None); - } DirstateParents::NULL } else if self.has_dirstate_v2() { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; - self.dirstate_data_file_uuid - .set(Some(docket.uuid.to_owned())); docket.parents() } else { crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? @@ -288,9 +281,13 @@ Ok(parents) } - fn read_dirstate_data_file_uuid( + /// Returns the information read from the dirstate docket necessary to + /// check if the data file has been updated/deleted by another process + /// since we last read the dirstate. + /// Namely, the data file uuid and the data size. + fn get_dirstate_data_file_integrity( &self, - ) -> Result<Option<Vec<u8>>, HgError> { + ) -> Result<(Option<Vec<u8>>, usize), HgError> { assert!( self.has_dirstate_v2(), "accessing dirstate data file ID without dirstate-v2" @@ -298,12 +295,12 @@ let dirstate = self.dirstate_file_contents()?; if dirstate.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - Ok(None) + Ok((None, 0)) } else { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; self.dirstate_parents.set(docket.parents()); - Ok(Some(docket.uuid.to_owned())) + Ok((Some(docket.uuid.to_owned()), docket.data_size())) } } @@ -370,7 +367,6 @@ let dirstate_file_contents = self.dirstate_file_contents()?; if dirstate_file_contents.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - self.dirstate_data_file_uuid.set(None); return Ok(OwningDirstateMap::new_empty(Vec::new())); } let docket = crate::dirstate_tree::on_disk::read_docket( @@ -381,8 +377,6 @@ "dirstate.post-docket-read-file", ); self.dirstate_parents.set(docket.parents()); - self.dirstate_data_file_uuid - .set(Some(docket.uuid.to_owned())); let uuid = docket.uuid.to_owned(); let data_size = docket.data_size(); @@ -540,10 +534,27 @@ // it’s unset let parents = self.dirstate_parents()?; let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() { - let uuid_opt = self - .dirstate_data_file_uuid - .get_or_init(|| self.read_dirstate_data_file_uuid())?; - let uuid_opt = uuid_opt.as_ref(); + let (uuid, data_size) = self.get_dirstate_data_file_integrity()?; + let uuid_changed = uuid.as_deref() != map.old_uuid(); + let data_length_changed = data_size != map.old_data_size(); + + if uuid_changed || data_length_changed { + // If uuid or length changed since last disk read, don't write. + // This is fine because either we're in a command that doesn't + // write anything too important (like `hg status`), or we're in + // `hg add` and we're supposed to have taken the lock before + // reading anyway. + // + // TODO complain loudly if we've changed anything important + // without taking the lock. + // (see `hg help config.format.use-dirstate-tracked-hint`) + log::debug!( + "dirstate has changed since last read, not updating." + ); + return Ok(()); + } + + let uuid_opt = map.old_uuid(); let write_mode = if uuid_opt.is_some() { DirstateMapWriteMode::Auto } else {
--- a/tests/test-dirstate-read-race.t Mon Dec 12 17:08:12 2022 +0100 +++ b/tests/test-dirstate-read-race.t Tue Feb 28 17:58:15 2023 +0100 @@ -312,25 +312,6 @@ 0 files updated, 0 files merged, 6 files removed, 0 files unresolved $ touch $TESTTMP/status-race-lock $ wait -#if rhg dirstate-v2-append pre-some-read - $ hg log -GT '{node|short} {desc}\n' - @ 9a86dcbfb938 more files to have two commit - | - o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append - - $ hg status - A dir/o - R dir/nested/m - ! dir/i - ! dir/j - ! dir/nested/h - ! dir2/k - ! dir2/l - ! g - ? dir/n - ? p - ? q -#else $ hg log -GT '{node|short} {desc}\n' o 9a86dcbfb938 more files to have two commit | @@ -341,7 +322,6 @@ ? dir/n ? p ? q -#endif The status process should return a consistent result and not crash.
--- a/tests/test-dirstate-status-write-race.t Mon Dec 12 17:08:12 2022 +0100 +++ b/tests/test-dirstate-status-write-race.t Tue Feb 28 17:58:15 2023 +0100 @@ -242,12 +242,12 @@ The file should in a "added" state $ hg status - A dir/n (no-rhg !) - A dir/n (rhg dirstate-v2-rewrite !) + A dir/n (no-rhg dirstate-v1 !) + A dir/n (no-dirstate-v1 !) A dir/n (missing-correct-output rhg dirstate-v1 !) A dir/o R dir/nested/m - ? dir/n (known-bad-output rhg no-dirstate-v2-rewrite !) + ? dir/n (known-bad-output rhg dirstate-v1 !) ? p ? q @@ -260,7 +260,6 @@ ? p ? q $ cat $TESTTMP/status-race-lock.log - abort: when writing $TESTTMP/race-with-add/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !) final cleanup @@ -291,20 +290,7 @@ The parent must change and the status should be clean # XXX rhg misbehaves here -#if no-rhg - $ hg summary - parent: 2:2e3b442a2fd4 tip - created-during-status - branch: default - commit: 1 removed, 3 unknown - update: (current) - phases: 3 draft - $ hg status - R dir/nested/m - ? dir/n - ? p - ? q -#else +#if rhg dirstate-v1 $ hg summary parent: 1:c349430a1631 more files to have two commits @@ -318,6 +304,19 @@ ? dir/n ? p ? q +#else + $ hg summary + parent: 2:2e3b442a2fd4 tip + created-during-status + branch: default + commit: 1 removed, 3 unknown + update: (current) + phases: 3 draft + $ hg status + R dir/nested/m + ? dir/n + ? p + ? q #endif The status process should return a consistent result and not crash. @@ -329,7 +328,6 @@ ? p ? q $ cat $TESTTMP/status-race-lock.log - abort: when removing $TESTTMP/race-with-commit/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !) final cleanup @@ -418,9 +416,9 @@ the first update should be on disk $ hg debugstate --all | grep "g" + n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !) + n 644 0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !) n 644 0 2000-01-01 00:25:00 g (no-rhg !) - n 644 0 2000-01-01 00:25:00 g (missing-correct-output rhg !) - n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg !) The status process should return a consistent result and not crash. @@ -431,7 +429,6 @@ ? p ? q $ cat $TESTTMP/status-race-lock.log - abort: when removing $TESTTMP/race-with-status/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !) final cleanup