rust-dirstate-v2: don't write dirstate if data file has changed
This fixes the following race:
- process A reads the dirstate
- process B reads and writes the dirstate
- process A writes the dirstate
This either resulted in losing what process B had just written or a crash
because the `uuid` had changed and we were trying to write to a file that
doesn't exist. More explanations inside.
This doesn't fix the issue for dirstate-v1, a later patch addresses it.
--- a/rust/hg-core/src/repo.rs Mon Dec 12 17:08:12 2022 +0100
+++ b/rust/hg-core/src/repo.rs Tue Feb 28 17:58:15 2023 +0100
@@ -34,7 +34,6 @@
requirements: HashSet<String>,
config: Config,
dirstate_parents: LazyCell<DirstateParents>,
- dirstate_data_file_uuid: LazyCell<Option<Vec<u8>>>,
dirstate_map: LazyCell<OwningDirstateMap>,
changelog: LazyCell<Changelog>,
manifestlog: LazyCell<Manifestlog>,
@@ -187,7 +186,6 @@
dot_hg,
config: repo_config,
dirstate_parents: LazyCell::new(),
- dirstate_data_file_uuid: LazyCell::new(),
dirstate_map: LazyCell::new(),
changelog: LazyCell::new(),
manifestlog: LazyCell::new(),
@@ -270,15 +268,10 @@
fn read_dirstate_parents(&self) -> Result<DirstateParents, HgError> {
let dirstate = self.dirstate_file_contents()?;
let parents = if dirstate.is_empty() {
- if self.has_dirstate_v2() {
- self.dirstate_data_file_uuid.set(None);
- }
DirstateParents::NULL
} else if self.has_dirstate_v2() {
let docket =
crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
- self.dirstate_data_file_uuid
- .set(Some(docket.uuid.to_owned()));
docket.parents()
} else {
crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
@@ -288,9 +281,13 @@
Ok(parents)
}
- fn read_dirstate_data_file_uuid(
+ /// Returns the information read from the dirstate docket necessary to
+ /// check if the data file has been updated/deleted by another process
+ /// since we last read the dirstate.
+ /// Namely, the data file uuid and the data size.
+ fn get_dirstate_data_file_integrity(
&self,
- ) -> Result<Option<Vec<u8>>, HgError> {
+ ) -> Result<(Option<Vec<u8>>, usize), HgError> {
assert!(
self.has_dirstate_v2(),
"accessing dirstate data file ID without dirstate-v2"
@@ -298,12 +295,12 @@
let dirstate = self.dirstate_file_contents()?;
if dirstate.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
- Ok(None)
+ Ok((None, 0))
} else {
let docket =
crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
self.dirstate_parents.set(docket.parents());
- Ok(Some(docket.uuid.to_owned()))
+ Ok((Some(docket.uuid.to_owned()), docket.data_size()))
}
}
@@ -370,7 +367,6 @@
let dirstate_file_contents = self.dirstate_file_contents()?;
if dirstate_file_contents.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
- self.dirstate_data_file_uuid.set(None);
return Ok(OwningDirstateMap::new_empty(Vec::new()));
}
let docket = crate::dirstate_tree::on_disk::read_docket(
@@ -381,8 +377,6 @@
"dirstate.post-docket-read-file",
);
self.dirstate_parents.set(docket.parents());
- self.dirstate_data_file_uuid
- .set(Some(docket.uuid.to_owned()));
let uuid = docket.uuid.to_owned();
let data_size = docket.data_size();
@@ -540,10 +534,27 @@
// it’s unset
let parents = self.dirstate_parents()?;
let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
- let uuid_opt = self
- .dirstate_data_file_uuid
- .get_or_init(|| self.read_dirstate_data_file_uuid())?;
- let uuid_opt = uuid_opt.as_ref();
+ let (uuid, data_size) = self.get_dirstate_data_file_integrity()?;
+ let uuid_changed = uuid.as_deref() != map.old_uuid();
+ let data_length_changed = data_size != map.old_data_size();
+
+ if uuid_changed || data_length_changed {
+ // If uuid or length changed since last disk read, don't write.
+ // This is fine because either we're in a command that doesn't
+ // write anything too important (like `hg status`), or we're in
+ // `hg add` and we're supposed to have taken the lock before
+ // reading anyway.
+ //
+ // TODO complain loudly if we've changed anything important
+ // without taking the lock.
+ // (see `hg help config.format.use-dirstate-tracked-hint`)
+ log::debug!(
+ "dirstate has changed since last read, not updating."
+ );
+ return Ok(());
+ }
+
+ let uuid_opt = map.old_uuid();
let write_mode = if uuid_opt.is_some() {
DirstateMapWriteMode::Auto
} else {
--- a/tests/test-dirstate-read-race.t Mon Dec 12 17:08:12 2022 +0100
+++ b/tests/test-dirstate-read-race.t Tue Feb 28 17:58:15 2023 +0100
@@ -312,25 +312,6 @@
0 files updated, 0 files merged, 6 files removed, 0 files unresolved
$ touch $TESTTMP/status-race-lock
$ wait
-#if rhg dirstate-v2-append pre-some-read
- $ hg log -GT '{node|short} {desc}\n'
- @ 9a86dcbfb938 more files to have two commit
- |
- o 4f23db756b09 recreate a bunch of files to facilitate dirstate-v2 append
-
- $ hg status
- A dir/o
- R dir/nested/m
- ! dir/i
- ! dir/j
- ! dir/nested/h
- ! dir2/k
- ! dir2/l
- ! g
- ? dir/n
- ? p
- ? q
-#else
$ hg log -GT '{node|short} {desc}\n'
o 9a86dcbfb938 more files to have two commit
|
@@ -341,7 +322,6 @@
? dir/n
? p
? q
-#endif
The status process should return a consistent result and not crash.
--- a/tests/test-dirstate-status-write-race.t Mon Dec 12 17:08:12 2022 +0100
+++ b/tests/test-dirstate-status-write-race.t Tue Feb 28 17:58:15 2023 +0100
@@ -242,12 +242,12 @@
The file should in a "added" state
$ hg status
- A dir/n (no-rhg !)
- A dir/n (rhg dirstate-v2-rewrite !)
+ A dir/n (no-rhg dirstate-v1 !)
+ A dir/n (no-dirstate-v1 !)
A dir/n (missing-correct-output rhg dirstate-v1 !)
A dir/o
R dir/nested/m
- ? dir/n (known-bad-output rhg no-dirstate-v2-rewrite !)
+ ? dir/n (known-bad-output rhg dirstate-v1 !)
? p
? q
@@ -260,7 +260,6 @@
? p
? q
$ cat $TESTTMP/status-race-lock.log
- abort: when writing $TESTTMP/race-with-add/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !)
final cleanup
@@ -291,20 +290,7 @@
The parent must change and the status should be clean
# XXX rhg misbehaves here
-#if no-rhg
- $ hg summary
- parent: 2:2e3b442a2fd4 tip
- created-during-status
- branch: default
- commit: 1 removed, 3 unknown
- update: (current)
- phases: 3 draft
- $ hg status
- R dir/nested/m
- ? dir/n
- ? p
- ? q
-#else
+#if rhg dirstate-v1
$ hg summary
parent: 1:c349430a1631
more files to have two commits
@@ -318,6 +304,19 @@
? dir/n
? p
? q
+#else
+ $ hg summary
+ parent: 2:2e3b442a2fd4 tip
+ created-during-status
+ branch: default
+ commit: 1 removed, 3 unknown
+ update: (current)
+ phases: 3 draft
+ $ hg status
+ R dir/nested/m
+ ? dir/n
+ ? p
+ ? q
#endif
The status process should return a consistent result and not crash.
@@ -329,7 +328,6 @@
? p
? q
$ cat $TESTTMP/status-race-lock.log
- abort: when removing $TESTTMP/race-with-commit/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !)
final cleanup
@@ -418,9 +416,9 @@
the first update should be on disk
$ hg debugstate --all | grep "g"
+ n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !)
+ n 644 0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !)
n 644 0 2000-01-01 00:25:00 g (no-rhg !)
- n 644 0 2000-01-01 00:25:00 g (missing-correct-output rhg !)
- n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg !)
The status process should return a consistent result and not crash.
@@ -431,7 +429,6 @@
? p
? q
$ cat $TESTTMP/status-race-lock.log
- abort: when removing $TESTTMP/race-with-status/.hg/dirstate.*: $ENOENT$ (glob) (known-bad-output rhg dirstate-v2-rewrite !)
final cleanup