rhg: remember the inode of .hg/dirstate
This allows us to detect changes of `.hg/dirstate`, which is either the
full dirstate (in dirstate-v1) or the docket file (v2) without relying on
data inside the file. It only works on UNIX systems.
This fixes a race condition for dirstate-v1 (as demonstrated by
the test changes) and adds a confortable layer of sanity for dirstate-v2.
--- a/mercurial/dirstatemap.py Tue Feb 28 17:58:15 2023 +0100
+++ b/mercurial/dirstatemap.py Wed Mar 01 16:48:09 2023 +0100
@@ -570,6 +570,12 @@
testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
if self._use_dirstate_v2:
self.docket # load the data if needed
+ inode = (
+ self.identity.stat.st_ino
+ if self.identity is not None
+ and self.identity.stat is not None
+ else None
+ )
testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
if not self.docket.uuid:
data = b''
@@ -581,12 +587,19 @@
self.docket.data_size,
self.docket.tree_metadata,
self.docket.uuid,
+ inode,
)
parents = self.docket.parents
else:
self._set_identity()
+ inode = (
+ self.identity.stat.st_ino
+ if self.identity is not None
+ and self.identity.stat is not None
+ else None
+ )
self._map, parents = rustmod.DirstateMap.new_v1(
- self._readdirstatefile()
+ self._readdirstatefile(), inode
)
if parents and not self._dirtyparents:
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Feb 28 17:58:15 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed Mar 01 16:48:09 2023 +0100
@@ -76,6 +76,14 @@
/// Can be `None` if using dirstate v1 or if it's a brand new dirstate.
pub(super) old_uuid: Option<Vec<u8>>,
+ /// Identity of the dirstate file (for dirstate-v1) or the docket file
+ /// (v2). Used to detect if the file has changed from another process.
+ /// Since it's always written atomically, we can compare the inode to
+ /// check the file identity.
+ ///
+ /// TODO On non-Unix systems, something like hashing is a possibility?
+ pub(super) identity: Option<u64>,
+
pub(super) dirstate_version: DirstateVersion,
/// Controlled by config option `devel.dirstate.v2.data_update_mode`
@@ -468,6 +476,7 @@
unreachable_bytes: 0,
old_data_size: 0,
old_uuid: None,
+ identity: None,
dirstate_version: DirstateVersion::V1,
write_mode: DirstateMapWriteMode::Auto,
}
@@ -479,9 +488,10 @@
data_size: usize,
metadata: &[u8],
uuid: Vec<u8>,
+ identity: Option<u64>,
) -> Result<Self, DirstateError> {
if let Some(data) = on_disk.get(..data_size) {
- Ok(on_disk::read(data, metadata, uuid)?)
+ Ok(on_disk::read(data, metadata, uuid, identity)?)
} else {
Err(DirstateV2ParseError::new("not enough bytes on disk").into())
}
@@ -490,6 +500,7 @@
#[timed]
pub fn new_v1(
on_disk: &'on_disk [u8],
+ identity: Option<u64>,
) -> Result<(Self, Option<DirstateParents>), DirstateError> {
let mut map = Self::empty(on_disk);
if map.on_disk.is_empty() {
@@ -531,6 +542,7 @@
},
)?;
let parents = Some(parents.clone());
+ map.identity = identity;
Ok((map, parents))
}
@@ -1853,6 +1865,7 @@
packed_len,
metadata.as_bytes(),
vec![],
+ None,
)?;
// Check that everything is accounted for
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Feb 28 17:58:15 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed Mar 01 16:48:09 2023 +0100
@@ -291,6 +291,7 @@
on_disk: &'on_disk [u8],
metadata: &[u8],
uuid: Vec<u8>,
+ identity: Option<u64>,
) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
if on_disk.is_empty() {
let mut map = DirstateMap::empty(on_disk);
@@ -314,6 +315,7 @@
unreachable_bytes: meta.unreachable_bytes.get(),
old_data_size: on_disk.len(),
old_uuid: Some(uuid),
+ identity,
dirstate_version: DirstateVersion::V2,
write_mode: DirstateMapWriteMode::Auto,
};
--- a/rust/hg-core/src/dirstate_tree/owning.rs Tue Feb 28 17:58:15 2023 +0100
+++ b/rust/hg-core/src/dirstate_tree/owning.rs Wed Mar 01 16:48:09 2023 +0100
@@ -31,6 +31,7 @@
pub fn new_v1<OnDisk>(
on_disk: OnDisk,
+ identity: Option<u64>,
) -> Result<(Self, DirstateParents), DirstateError>
where
OnDisk: Deref<Target = [u8]> + Send + 'static,
@@ -42,7 +43,7 @@
OwningDirstateMapTryBuilder {
on_disk,
map_builder: |bytes| {
- DirstateMap::new_v1(&bytes).map(|(dmap, p)| {
+ DirstateMap::new_v1(&bytes, identity).map(|(dmap, p)| {
parents = p.unwrap_or(DirstateParents::NULL);
dmap
})
@@ -58,6 +59,7 @@
data_size: usize,
metadata: &[u8],
uuid: Vec<u8>,
+ identity: Option<u64>,
) -> Result<Self, DirstateError>
where
OnDisk: Deref<Target = [u8]> + Send + 'static,
@@ -67,7 +69,9 @@
OwningDirstateMapTryBuilder {
on_disk,
map_builder: |bytes| {
- DirstateMap::new_v2(&bytes, data_size, metadata, uuid)
+ DirstateMap::new_v2(
+ &bytes, data_size, metadata, uuid, identity,
+ )
},
}
.try_build()
@@ -92,6 +96,10 @@
self.get_map().old_uuid.as_deref()
}
+ pub fn old_identity(&self) -> Option<u64> {
+ self.get_map().identity
+ }
+
pub fn old_data_size(&self) -> usize {
self.get_map().old_data_size
}
--- a/rust/hg-core/src/repo.rs Tue Feb 28 17:58:15 2023 +0100
+++ b/rust/hg-core/src/repo.rs Wed Mar 01 16:48:09 2023 +0100
@@ -259,6 +259,15 @@
.unwrap_or(Vec::new()))
}
+ fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
+ use std::os::unix::fs::MetadataExt;
+ Ok(self
+ .hg_vfs()
+ .symlink_metadata("dirstate")
+ .io_not_found_as_none()?
+ .map(|meta| meta.ino()))
+ }
+
pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
Ok(*self
.dirstate_parents
@@ -284,23 +293,27 @@
/// Returns the information read from the dirstate docket necessary to
/// check if the data file has been updated/deleted by another process
/// since we last read the dirstate.
- /// Namely, the data file uuid and the data size.
+ /// Namely, the inode, data file uuid and the data size.
fn get_dirstate_data_file_integrity(
&self,
- ) -> Result<(Option<Vec<u8>>, usize), HgError> {
+ ) -> Result<(Option<u64>, Option<Vec<u8>>, usize), HgError> {
assert!(
self.has_dirstate_v2(),
"accessing dirstate data file ID without dirstate-v2"
);
+ // Get the identity before the contents since we could have a race
+ // between the two. Having an identity that is too old is fine, but
+ // one that is younger than the content change is bad.
+ let identity = self.dirstate_identity()?;
let dirstate = self.dirstate_file_contents()?;
if dirstate.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
- Ok((None, 0))
+ Ok((identity, None, 0))
} else {
let docket =
crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
self.dirstate_parents.set(docket.parents());
- Ok((Some(docket.uuid.to_owned()), docket.data_size()))
+ Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
}
}
@@ -347,13 +360,16 @@
self.config(),
"dirstate.pre-read-file",
);
+ let identity = self.dirstate_identity()?;
let dirstate_file_contents = self.dirstate_file_contents()?;
return if dirstate_file_contents.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
Ok(OwningDirstateMap::new_empty(Vec::new()))
} else {
- let (map, parents) =
- OwningDirstateMap::new_v1(dirstate_file_contents)?;
+ let (map, parents) = OwningDirstateMap::new_v1(
+ dirstate_file_contents,
+ identity,
+ )?;
self.dirstate_parents.set(parents);
Ok(map)
};
@@ -365,6 +381,7 @@
) -> Result<OwningDirstateMap, DirstateError> {
debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
let dirstate_file_contents = self.dirstate_file_contents()?;
+ let identity = self.dirstate_identity()?;
if dirstate_file_contents.is_empty() {
self.dirstate_parents.set(DirstateParents::NULL);
return Ok(OwningDirstateMap::new_empty(Vec::new()));
@@ -410,7 +427,9 @@
}
Err(e) => return Err(e.into()),
};
- OwningDirstateMap::new_v2(contents, data_size, metadata, uuid)
+ OwningDirstateMap::new_v2(
+ contents, data_size, metadata, uuid, identity,
+ )
} else {
match self
.hg_vfs()
@@ -418,7 +437,7 @@
.io_not_found_as_none()
{
Ok(Some(data_mmap)) => OwningDirstateMap::new_v2(
- data_mmap, data_size, metadata, uuid,
+ data_mmap, data_size, metadata, uuid, identity,
),
Ok(None) => {
// Race where the data file was deleted right after we
@@ -534,12 +553,15 @@
// it’s unset
let parents = self.dirstate_parents()?;
let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() {
- let (uuid, data_size) = self.get_dirstate_data_file_integrity()?;
+ let (identity, uuid, data_size) =
+ self.get_dirstate_data_file_integrity()?;
+ let identity_changed = identity != map.old_identity();
let uuid_changed = uuid.as_deref() != map.old_uuid();
let data_length_changed = data_size != map.old_data_size();
- if uuid_changed || data_length_changed {
- // If uuid or length changed since last disk read, don't write.
+ if identity_changed || uuid_changed || data_length_changed {
+ // If any of identity, uuid or length have changed since
+ // last disk read, don't write.
// This is fine because either we're in a command that doesn't
// write anything too important (like `hg status`), or we're in
// `hg add` and we're supposed to have taken the lock before
@@ -636,6 +658,22 @@
(packed_dirstate, old_uuid)
} else {
+ let identity = self.dirstate_identity()?;
+ if identity != map.old_identity() {
+ // If identity changed since last disk read, don't write.
+ // This is fine because either we're in a command that doesn't
+ // write anything too important (like `hg status`), or we're in
+ // `hg add` and we're supposed to have taken the lock before
+ // reading anyway.
+ //
+ // TODO complain loudly if we've changed anything important
+ // without taking the lock.
+ // (see `hg help config.format.use-dirstate-tracked-hint`)
+ log::debug!(
+ "dirstate has changed since last read, not updating."
+ );
+ return Ok(());
+ }
(map.pack_v1(parents)?, None)
};
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Tue Feb 28 17:58:15 2023 +0100
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed Mar 01 16:48:09 2023 +0100
@@ -49,9 +49,10 @@
@staticmethod
def new_v1(
on_disk: PyBytes,
+ identity: Option<u64>,
) -> PyResult<PyObject> {
let on_disk = PyBytesDeref::new(py, on_disk);
- let (map, parents) = OwningDirstateMap::new_v1(on_disk)
+ let (map, parents) = OwningDirstateMap::new_v1(on_disk, identity)
.map_err(|e| dirstate_error(py, e))?;
let map = Self::create_instance(py, map)?;
let p1 = PyBytes::new(py, parents.p1.as_bytes());
@@ -67,6 +68,7 @@
data_size: usize,
tree_metadata: PyBytes,
uuid: PyBytes,
+ identity: Option<u64>,
) -> PyResult<PyObject> {
let dirstate_error = |e: DirstateError| {
PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
@@ -74,7 +76,11 @@
let on_disk = PyBytesDeref::new(py, on_disk);
let uuid = uuid.data(py);
let map = OwningDirstateMap::new_v2(
- on_disk, data_size, tree_metadata.data(py), uuid.to_owned(),
+ on_disk,
+ data_size,
+ tree_metadata.data(py),
+ uuid.to_owned(),
+ identity,
).map_err(dirstate_error)?;
let map = Self::create_instance(py, map)?;
Ok(map.into_object())
--- a/tests/test-dirstate-status-write-race.t Tue Feb 28 17:58:15 2023 +0100
+++ b/tests/test-dirstate-status-write-race.t Wed Mar 01 16:48:09 2023 +0100
@@ -242,12 +242,9 @@
The file should in a "added" state
$ hg status
- A dir/n (no-rhg dirstate-v1 !)
- A dir/n (no-dirstate-v1 !)
- A dir/n (missing-correct-output rhg dirstate-v1 !)
+ A dir/n
A dir/o
R dir/nested/m
- ? dir/n (known-bad-output rhg dirstate-v1 !)
? p
? q
@@ -289,22 +286,6 @@
The parent must change and the status should be clean
-# XXX rhg misbehaves here
-#if rhg dirstate-v1
- $ hg summary
- parent: 1:c349430a1631
- more files to have two commits
- branch: default
- commit: 1 added, 1 removed, 3 unknown (new branch head)
- update: 1 new changesets (update)
- phases: 3 draft
- $ hg status
- A dir/o
- R dir/nested/m
- ? dir/n
- ? p
- ? q
-#else
$ hg summary
parent: 2:2e3b442a2fd4 tip
created-during-status
@@ -317,7 +298,6 @@
? dir/n
? p
? q
-#endif
The status process should return a consistent result and not crash.
@@ -416,9 +396,7 @@
the first update should be on disk
$ hg debugstate --all | grep "g"
- n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !)
- n 644 0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !)
- n 644 0 2000-01-01 00:25:00 g (no-rhg !)
+ n 644 0 2000-01-01 00:25:00 g
The status process should return a consistent result and not crash.