Mercurial > hg
changeset 50245:dbe09fb038fc stable
rhg: remember the inode of .hg/dirstate
This allows us to detect changes of `.hg/dirstate`, which is either the
full dirstate (in dirstate-v1) or the docket file (v2) without relying on
data inside the file. It only works on UNIX systems.
This fixes a race condition for dirstate-v1 (as demonstrated by
the test changes) and adds a confortable layer of sanity for dirstate-v2.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 01 Mar 2023 16:48:09 +0100 |
parents | 07d030b38097 |
children | a6b497872b97 |
files | mercurial/dirstatemap.py rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/dirstate_tree/owning.rs rust/hg-core/src/repo.rs rust/hg-cpython/src/dirstate/dirstate_map.rs tests/test-dirstate-status-write-race.t |
diffstat | 7 files changed, 99 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/dirstatemap.py Tue Feb 28 17:58:15 2023 +0100 +++ b/mercurial/dirstatemap.py Wed Mar 01 16:48:09 2023 +0100 @@ -570,6 +570,12 @@ testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file') if self._use_dirstate_v2: self.docket # load the data if needed + inode = ( + self.identity.stat.st_ino + if self.identity is not None + and self.identity.stat is not None + else None + ) testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') if not self.docket.uuid: data = b'' @@ -581,12 +587,19 @@ self.docket.data_size, self.docket.tree_metadata, self.docket.uuid, + inode, ) parents = self.docket.parents else: self._set_identity() + inode = ( + self.identity.stat.st_ino + if self.identity is not None + and self.identity.stat is not None + else None + ) self._map, parents = rustmod.DirstateMap.new_v1( - self._readdirstatefile() + self._readdirstatefile(), inode ) if parents and not self._dirtyparents:
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Feb 28 17:58:15 2023 +0100 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed Mar 01 16:48:09 2023 +0100 @@ -76,6 +76,14 @@ /// Can be `None` if using dirstate v1 or if it's a brand new dirstate. pub(super) old_uuid: Option<Vec<u8>>, + /// Identity of the dirstate file (for dirstate-v1) or the docket file + /// (v2). Used to detect if the file has changed from another process. + /// Since it's always written atomically, we can compare the inode to + /// check the file identity. + /// + /// TODO On non-Unix systems, something like hashing is a possibility? + pub(super) identity: Option<u64>, + pub(super) dirstate_version: DirstateVersion, /// Controlled by config option `devel.dirstate.v2.data_update_mode` @@ -468,6 +476,7 @@ unreachable_bytes: 0, old_data_size: 0, old_uuid: None, + identity: None, dirstate_version: DirstateVersion::V1, write_mode: DirstateMapWriteMode::Auto, } @@ -479,9 +488,10 @@ data_size: usize, metadata: &[u8], uuid: Vec<u8>, + identity: Option<u64>, ) -> Result<Self, DirstateError> { if let Some(data) = on_disk.get(..data_size) { - Ok(on_disk::read(data, metadata, uuid)?) + Ok(on_disk::read(data, metadata, uuid, identity)?) } else { Err(DirstateV2ParseError::new("not enough bytes on disk").into()) } @@ -490,6 +500,7 @@ #[timed] pub fn new_v1( on_disk: &'on_disk [u8], + identity: Option<u64>, ) -> Result<(Self, Option<DirstateParents>), DirstateError> { let mut map = Self::empty(on_disk); if map.on_disk.is_empty() { @@ -531,6 +542,7 @@ }, )?; let parents = Some(parents.clone()); + map.identity = identity; Ok((map, parents)) } @@ -1853,6 +1865,7 @@ packed_len, metadata.as_bytes(), vec![], + None, )?; // Check that everything is accounted for
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Feb 28 17:58:15 2023 +0100 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed Mar 01 16:48:09 2023 +0100 @@ -291,6 +291,7 @@ on_disk: &'on_disk [u8], metadata: &[u8], uuid: Vec<u8>, + identity: Option<u64>, ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> { if on_disk.is_empty() { let mut map = DirstateMap::empty(on_disk); @@ -314,6 +315,7 @@ unreachable_bytes: meta.unreachable_bytes.get(), old_data_size: on_disk.len(), old_uuid: Some(uuid), + identity, dirstate_version: DirstateVersion::V2, write_mode: DirstateMapWriteMode::Auto, };
--- a/rust/hg-core/src/dirstate_tree/owning.rs Tue Feb 28 17:58:15 2023 +0100 +++ b/rust/hg-core/src/dirstate_tree/owning.rs Wed Mar 01 16:48:09 2023 +0100 @@ -31,6 +31,7 @@ pub fn new_v1<OnDisk>( on_disk: OnDisk, + identity: Option<u64>, ) -> Result<(Self, DirstateParents), DirstateError> where OnDisk: Deref<Target = [u8]> + Send + 'static, @@ -42,7 +43,7 @@ OwningDirstateMapTryBuilder { on_disk, map_builder: |bytes| { - DirstateMap::new_v1(&bytes).map(|(dmap, p)| { + DirstateMap::new_v1(&bytes, identity).map(|(dmap, p)| { parents = p.unwrap_or(DirstateParents::NULL); dmap }) @@ -58,6 +59,7 @@ data_size: usize, metadata: &[u8], uuid: Vec<u8>, + identity: Option<u64>, ) -> Result<Self, DirstateError> where OnDisk: Deref<Target = [u8]> + Send + 'static, @@ -67,7 +69,9 @@ OwningDirstateMapTryBuilder { on_disk, map_builder: |bytes| { - DirstateMap::new_v2(&bytes, data_size, metadata, uuid) + DirstateMap::new_v2( + &bytes, data_size, metadata, uuid, identity, + ) }, } .try_build() @@ -92,6 +96,10 @@ self.get_map().old_uuid.as_deref() } + pub fn old_identity(&self) -> Option<u64> { + self.get_map().identity + } + pub fn old_data_size(&self) -> usize { self.get_map().old_data_size }
--- a/rust/hg-core/src/repo.rs Tue Feb 28 17:58:15 2023 +0100 +++ b/rust/hg-core/src/repo.rs Wed Mar 01 16:48:09 2023 +0100 @@ -259,6 +259,15 @@ .unwrap_or(Vec::new())) } + fn dirstate_identity(&self) -> Result<Option<u64>, HgError> { + use std::os::unix::fs::MetadataExt; + Ok(self + .hg_vfs() + .symlink_metadata("dirstate") + .io_not_found_as_none()? + .map(|meta| meta.ino())) + } + pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> { Ok(*self .dirstate_parents @@ -284,23 +293,27 @@ /// Returns the information read from the dirstate docket necessary to /// check if the data file has been updated/deleted by another process /// since we last read the dirstate. - /// Namely, the data file uuid and the data size. + /// Namely, the inode, data file uuid and the data size. fn get_dirstate_data_file_integrity( &self, - ) -> Result<(Option<Vec<u8>>, usize), HgError> { + ) -> Result<(Option<u64>, Option<Vec<u8>>, usize), HgError> { assert!( self.has_dirstate_v2(), "accessing dirstate data file ID without dirstate-v2" ); + // Get the identity before the contents since we could have a race + // between the two. Having an identity that is too old is fine, but + // one that is younger than the content change is bad. + let identity = self.dirstate_identity()?; let dirstate = self.dirstate_file_contents()?; if dirstate.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); - Ok((None, 0)) + Ok((identity, None, 0)) } else { let docket = crate::dirstate_tree::on_disk::read_docket(&dirstate)?; self.dirstate_parents.set(docket.parents()); - Ok((Some(docket.uuid.to_owned()), docket.data_size())) + Ok((identity, Some(docket.uuid.to_owned()), docket.data_size())) } } @@ -347,13 +360,16 @@ self.config(), "dirstate.pre-read-file", ); + let identity = self.dirstate_identity()?; let dirstate_file_contents = self.dirstate_file_contents()?; return if dirstate_file_contents.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); Ok(OwningDirstateMap::new_empty(Vec::new())) } else { - let (map, parents) = - OwningDirstateMap::new_v1(dirstate_file_contents)?; + let (map, parents) = OwningDirstateMap::new_v1( + dirstate_file_contents, + identity, + )?; self.dirstate_parents.set(parents); Ok(map) }; @@ -365,6 +381,7 @@ ) -> Result<OwningDirstateMap, DirstateError> { debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); let dirstate_file_contents = self.dirstate_file_contents()?; + let identity = self.dirstate_identity()?; if dirstate_file_contents.is_empty() { self.dirstate_parents.set(DirstateParents::NULL); return Ok(OwningDirstateMap::new_empty(Vec::new())); @@ -410,7 +427,9 @@ } Err(e) => return Err(e.into()), }; - OwningDirstateMap::new_v2(contents, data_size, metadata, uuid) + OwningDirstateMap::new_v2( + contents, data_size, metadata, uuid, identity, + ) } else { match self .hg_vfs() @@ -418,7 +437,7 @@ .io_not_found_as_none() { Ok(Some(data_mmap)) => OwningDirstateMap::new_v2( - data_mmap, data_size, metadata, uuid, + data_mmap, data_size, metadata, uuid, identity, ), Ok(None) => { // Race where the data file was deleted right after we @@ -534,12 +553,15 @@ // it’s unset let parents = self.dirstate_parents()?; let (packed_dirstate, old_uuid_to_remove) = if self.has_dirstate_v2() { - let (uuid, data_size) = self.get_dirstate_data_file_integrity()?; + let (identity, uuid, data_size) = + self.get_dirstate_data_file_integrity()?; + let identity_changed = identity != map.old_identity(); let uuid_changed = uuid.as_deref() != map.old_uuid(); let data_length_changed = data_size != map.old_data_size(); - if uuid_changed || data_length_changed { - // If uuid or length changed since last disk read, don't write. + if identity_changed || uuid_changed || data_length_changed { + // If any of identity, uuid or length have changed since + // last disk read, don't write. // This is fine because either we're in a command that doesn't // write anything too important (like `hg status`), or we're in // `hg add` and we're supposed to have taken the lock before @@ -636,6 +658,22 @@ (packed_dirstate, old_uuid) } else { + let identity = self.dirstate_identity()?; + if identity != map.old_identity() { + // If identity changed since last disk read, don't write. + // This is fine because either we're in a command that doesn't + // write anything too important (like `hg status`), or we're in + // `hg add` and we're supposed to have taken the lock before + // reading anyway. + // + // TODO complain loudly if we've changed anything important + // without taking the lock. + // (see `hg help config.format.use-dirstate-tracked-hint`) + log::debug!( + "dirstate has changed since last read, not updating." + ); + return Ok(()); + } (map.pack_v1(parents)?, None) };
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Tue Feb 28 17:58:15 2023 +0100 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed Mar 01 16:48:09 2023 +0100 @@ -49,9 +49,10 @@ @staticmethod def new_v1( on_disk: PyBytes, + identity: Option<u64>, ) -> PyResult<PyObject> { let on_disk = PyBytesDeref::new(py, on_disk); - let (map, parents) = OwningDirstateMap::new_v1(on_disk) + let (map, parents) = OwningDirstateMap::new_v1(on_disk, identity) .map_err(|e| dirstate_error(py, e))?; let map = Self::create_instance(py, map)?; let p1 = PyBytes::new(py, parents.p1.as_bytes()); @@ -67,6 +68,7 @@ data_size: usize, tree_metadata: PyBytes, uuid: PyBytes, + identity: Option<u64>, ) -> PyResult<PyObject> { let dirstate_error = |e: DirstateError| { PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e)) @@ -74,7 +76,11 @@ let on_disk = PyBytesDeref::new(py, on_disk); let uuid = uuid.data(py); let map = OwningDirstateMap::new_v2( - on_disk, data_size, tree_metadata.data(py), uuid.to_owned(), + on_disk, + data_size, + tree_metadata.data(py), + uuid.to_owned(), + identity, ).map_err(dirstate_error)?; let map = Self::create_instance(py, map)?; Ok(map.into_object())
--- a/tests/test-dirstate-status-write-race.t Tue Feb 28 17:58:15 2023 +0100 +++ b/tests/test-dirstate-status-write-race.t Wed Mar 01 16:48:09 2023 +0100 @@ -242,12 +242,9 @@ The file should in a "added" state $ hg status - A dir/n (no-rhg dirstate-v1 !) - A dir/n (no-dirstate-v1 !) - A dir/n (missing-correct-output rhg dirstate-v1 !) + A dir/n A dir/o R dir/nested/m - ? dir/n (known-bad-output rhg dirstate-v1 !) ? p ? q @@ -289,22 +286,6 @@ The parent must change and the status should be clean -# XXX rhg misbehaves here -#if rhg dirstate-v1 - $ hg summary - parent: 1:c349430a1631 - more files to have two commits - branch: default - commit: 1 added, 1 removed, 3 unknown (new branch head) - update: 1 new changesets (update) - phases: 3 draft - $ hg status - A dir/o - R dir/nested/m - ? dir/n - ? p - ? q -#else $ hg summary parent: 2:2e3b442a2fd4 tip created-during-status @@ -317,7 +298,6 @@ ? dir/n ? p ? q -#endif The status process should return a consistent result and not crash. @@ -416,9 +396,7 @@ the first update should be on disk $ hg debugstate --all | grep "g" - n 644 0 2000-01-01 00:10:00 g (known-bad-output rhg dirstate-v1 !) - n 644 0 2000-01-01 00:25:00 g (rhg no-dirstate-v1 !) - n 644 0 2000-01-01 00:25:00 g (no-rhg !) + n 644 0 2000-01-01 00:25:00 g The status process should return a consistent result and not crash.