changeset 52054:ea0467ed76aa

rust-dirstate-map: use a more precise identity This is closer to the behavior of what Python does. So far, we were checking only the inode, but this might not be good enough for the v1 case.
author Raphaël Gomès <rgomes@octobus.net>
date Thu, 03 Oct 2024 16:35:31 +0200
parents af54626bf358
children 0529e1a468dd
files mercurial/dirstatemap.py rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/dirstate_tree/owning.rs rust/hg-core/src/repo.rs rust/hg-cpython/src/dirstate.rs rust/hg-cpython/src/dirstate/dirstate_map.rs
diffstat 7 files changed, 161 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/dirstatemap.py	Mon Oct 14 14:14:21 2024 +0200
+++ b/mercurial/dirstatemap.py	Thu Oct 03 16:35:31 2024 +0200
@@ -5,6 +5,8 @@
 
 from __future__ import annotations
 
+import stat
+
 from typing import (
     Optional,
     TYPE_CHECKING,
@@ -678,12 +680,7 @@
                     parents = self._v1_map(e)
                 else:
                     parents = self.docket.parents
-                    inode = (
-                        self.identity.stat.st_ino
-                        if self.identity is not None
-                        and self.identity.stat is not None
-                        else None
-                    )
+                    identity = self._get_rust_identity()
                     testing.wait_on_cfg(
                         self._ui, b'dirstate.post-docket-read-file'
                     )
@@ -697,7 +694,7 @@
                             self.docket.data_size,
                             self.docket.tree_metadata,
                             self.docket.uuid,
-                            inode,
+                            identity,
                         )
                     parents = self.docket.parents
             else:
@@ -711,16 +708,31 @@
             self.get = self._map.get
             return self._map
 
-        def _v1_map(self, from_v2_exception=None):
+        def _get_rust_identity(self):
             self._set_identity()
-            inode = (
-                self.identity.stat.st_ino
-                if self.identity is not None and self.identity.stat is not None
-                else None
-            )
+            identity = None
+            if self.identity is not None and self.identity.stat is not None:
+                stat_info = self.identity.stat
+                identity = rustmod.DirstateIdentity(
+                    mode=stat_info.st_mode,
+                    dev=stat_info.st_dev,
+                    ino=stat_info.st_ino,
+                    nlink=stat_info.st_nlink,
+                    uid=stat_info.st_uid,
+                    gid=stat_info.st_gid,
+                    size=stat_info.st_size,
+                    mtime=stat_info[stat.ST_MTIME],
+                    mtime_nsec=0,
+                    ctime=stat_info[stat.ST_CTIME],
+                    ctime_nsec=0,
+                )
+            return identity
+
+        def _v1_map(self, from_v2_exception=None):
+            identity = self._get_rust_identity()
             try:
                 self._map, parents = rustmod.DirstateMap.new_v1(
-                    self._readdirstatefile(), inode
+                    self._readdirstatefile(), identity
                 )
             except OSError as e:
                 if from_v2_exception is not None:
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Oct 14 14:14:21 2024 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Thu Oct 03 16:35:31 2024 +0200
@@ -1,5 +1,7 @@
 use bytes_cast::BytesCast;
 use std::borrow::Cow;
+use std::fs::Metadata;
+use std::os::unix::fs::MetadataExt;
 use std::path::PathBuf;
 
 use super::on_disk;
@@ -45,6 +47,68 @@
     ForceAppend,
 }
 
+/// Used to detect out-of-process changes in the dirstate
+#[derive(Debug, Copy, Clone)]
+pub struct DirstateIdentity {
+    pub mode: u32,
+    pub dev: u64,
+    pub ino: u64,
+    pub nlink: u64,
+    pub uid: u32,
+    pub gid: u32,
+    pub size: u64,
+    pub mtime: i64,
+    pub mtime_nsec: i64,
+    pub ctime: i64,
+    pub ctime_nsec: i64,
+}
+
+impl From<Metadata> for DirstateIdentity {
+    fn from(value: Metadata) -> Self {
+        Self {
+            mode: value.mode(),
+            dev: value.dev(),
+            ino: value.ino(),
+            nlink: value.nlink(),
+            uid: value.uid(),
+            gid: value.gid(),
+            size: value.size(),
+            mtime: value.mtime(),
+            mtime_nsec: value.mtime_nsec(),
+            ctime: value.ctime(),
+            ctime_nsec: value.ctime_nsec(),
+        }
+    }
+}
+
+impl PartialEq for DirstateIdentity {
+    fn eq(&self, other: &Self) -> bool {
+        // Some platforms return 0 when they have no support for nanos.
+        // This shouldn't be a problem in practice because of how highly
+        // unlikely it is that we actually get exactly 0 nanos, and worst
+        // case scenario, we don't write out the dirstate in a non-wlocked
+        // situation like status.
+        let mtime_nanos_equal = (self.mtime_nsec == 0
+            || other.mtime_nsec == 0)
+            || self.mtime_nsec == other.mtime_nsec;
+        let ctime_nanos_equal = (self.ctime_nsec == 0
+            || other.ctime_nsec == 0)
+            || self.ctime_nsec == other.ctime_nsec;
+
+        self.mode == other.mode
+            && self.dev == other.dev
+            && self.ino == other.ino
+            && self.nlink == other.nlink
+            && self.uid == other.uid
+            && self.gid == other.gid
+            && self.size == other.size
+            && self.mtime == other.mtime
+            && mtime_nanos_equal
+            && self.ctime == other.ctime
+            && ctime_nanos_equal
+    }
+}
+
 #[derive(Debug)]
 pub struct DirstateMap<'on_disk> {
     /// Contents of the `.hg/dirstate` file
@@ -82,7 +146,7 @@
     /// check the file identity.
     ///
     /// TODO On non-Unix systems, something like hashing is a possibility?
-    pub(super) identity: Option<u64>,
+    pub(super) identity: Option<DirstateIdentity>,
 
     pub(super) dirstate_version: DirstateVersion,
 
@@ -484,7 +548,7 @@
         data_size: usize,
         metadata: &[u8],
         uuid: Vec<u8>,
-        identity: Option<u64>,
+        identity: Option<DirstateIdentity>,
     ) -> Result<Self, DirstateError> {
         if let Some(data) = on_disk.get(..data_size) {
             Ok(on_disk::read(data, metadata, uuid, identity)?)
@@ -496,7 +560,7 @@
     #[logging_timer::time("trace")]
     pub fn new_v1(
         on_disk: &'on_disk [u8],
-        identity: Option<u64>,
+        identity: Option<DirstateIdentity>,
     ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
         let mut map = Self::empty(on_disk);
         map.identity = identity;
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Mon Oct 14 14:14:21 2024 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu Oct 03 16:35:31 2024 +0200
@@ -21,6 +21,8 @@
 use std::borrow::Cow;
 use std::fmt::Write;
 
+use super::dirstate_map::DirstateIdentity;
+
 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
 /// This a redundant sanity check more than an actual "magic number" since
 /// `.hg/requires` already governs which format should be used.
@@ -288,7 +290,7 @@
     on_disk: &'on_disk [u8],
     metadata: &[u8],
     uuid: Vec<u8>,
-    identity: Option<u64>,
+    identity: Option<DirstateIdentity>,
 ) -> Result<DirstateMap<'on_disk>, DirstateV2ParseError> {
     if on_disk.is_empty() {
         let mut map = DirstateMap::empty(on_disk);
--- a/rust/hg-core/src/dirstate_tree/owning.rs	Mon Oct 14 14:14:21 2024 +0200
+++ b/rust/hg-core/src/dirstate_tree/owning.rs	Thu Oct 03 16:35:31 2024 +0200
@@ -1,6 +1,6 @@
 use crate::{DirstateError, DirstateParents};
 
-use super::dirstate_map::DirstateMap;
+use super::dirstate_map::{DirstateIdentity, DirstateMap};
 use self_cell::self_cell;
 use std::ops::Deref;
 
@@ -15,7 +15,10 @@
 );
 
 impl OwningDirstateMap {
-    pub fn new_empty<OnDisk>(on_disk: OnDisk, identity: Option<u64>) -> Self
+    pub fn new_empty<OnDisk>(
+        on_disk: OnDisk,
+        identity: Option<DirstateIdentity>,
+    ) -> Self
     where
         OnDisk: Deref<Target = [u8]> + Send + 'static,
     {
@@ -30,7 +33,7 @@
 
     pub fn new_v1<OnDisk>(
         on_disk: OnDisk,
-        identity: Option<u64>,
+        identity: Option<DirstateIdentity>,
     ) -> Result<(Self, DirstateParents), DirstateError>
     where
         OnDisk: Deref<Target = [u8]> + Send + 'static,
@@ -54,7 +57,7 @@
         data_size: usize,
         metadata: &[u8],
         uuid: Vec<u8>,
-        identity: Option<u64>,
+        identity: Option<DirstateIdentity>,
     ) -> Result<Self, DirstateError>
     where
         OnDisk: Deref<Target = [u8]> + Send + 'static,
@@ -85,7 +88,7 @@
         self.get_map().old_uuid.as_deref()
     }
 
-    pub fn old_identity(&self) -> Option<u64> {
+    pub fn old_identity(&self) -> Option<DirstateIdentity> {
         self.get_map().identity
     }
 
--- a/rust/hg-core/src/repo.rs	Mon Oct 14 14:14:21 2024 +0200
+++ b/rust/hg-core/src/repo.rs	Thu Oct 03 16:35:31 2024 +0200
@@ -1,7 +1,9 @@
 use crate::changelog::Changelog;
 use crate::config::{Config, ConfigError, ConfigParseError};
 use crate::dirstate::DirstateParents;
-use crate::dirstate_tree::dirstate_map::DirstateMapWriteMode;
+use crate::dirstate_tree::dirstate_map::{
+    DirstateIdentity, DirstateMapWriteMode,
+};
 use crate::dirstate_tree::on_disk::Docket as DirstateDocket;
 use crate::dirstate_tree::owning::OwningDirstateMap;
 use crate::errors::HgResultExt;
@@ -34,7 +36,8 @@
 
 const V2_MAX_READ_ATTEMPTS: usize = 5;
 
-type DirstateMapIdentity = (Option<u64>, Option<Vec<u8>>, usize);
+/// Docket file identity, data file uuid and the data size
+type DirstateV2Identity = (Option<DirstateIdentity>, Option<Vec<u8>>, usize);
 
 /// A repository on disk
 pub struct Repo {
@@ -311,13 +314,12 @@
             .unwrap_or_default())
     }
 
-    fn dirstate_identity(&self) -> Result<Option<u64>, HgError> {
-        use std::os::unix::fs::MetadataExt;
+    fn dirstate_identity(&self) -> Result<Option<DirstateIdentity>, HgError> {
         Ok(self
             .hg_vfs()
             .symlink_metadata("dirstate")
             .io_not_found_as_none()?
-            .map(|meta| meta.ino()))
+            .map(DirstateIdentity::from))
     }
 
     pub fn dirstate_parents(&self) -> Result<DirstateParents, HgError> {
@@ -355,10 +357,10 @@
     /// Returns the information read from the dirstate docket necessary to
     /// check if the data file has been updated/deleted by another process
     /// since we last read the dirstate.
-    /// Namely, the inode, data file uuid and the data size.
+    /// Namely the docket file identity, data file uuid and the data size.
     fn get_dirstate_data_file_integrity(
         &self,
-    ) -> Result<DirstateMapIdentity, HgError> {
+    ) -> Result<DirstateV2Identity, HgError> {
         assert!(
             self.use_dirstate_v2(),
             "accessing dirstate data file ID without dirstate-v2"
--- a/rust/hg-cpython/src/dirstate.rs	Mon Oct 14 14:14:21 2024 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Thu Oct 03 16:35:31 2024 +0200
@@ -16,12 +16,11 @@
 mod status;
 use self::item::DirstateItem;
 use crate::{
-    dirstate::{
-        dirs_multiset::Dirs, dirstate_map::DirstateMap, status::status_wrapper,
-    },
+    dirstate::{dirs_multiset::Dirs, status::status_wrapper},
     exceptions,
 };
 use cpython::{PyBytes, PyDict, PyList, PyModule, PyObject, PyResult, Python};
+use dirstate_map::{DirstateIdentity, DirstateMap};
 use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER;
 
 /// Create the module, with `__package__` given from parent
@@ -42,6 +41,7 @@
     m.add_class::<Dirs>(py)?;
     m.add_class::<DirstateMap>(py)?;
     m.add_class::<DirstateItem>(py)?;
+    m.add_class::<DirstateIdentity>(py)?;
     m.add(py, "V2_FORMAT_MARKER", PyBytes::new(py, V2_FORMAT_MARKER))?;
     m.add(
         py,
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Mon Oct 14 14:14:21 2024 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Thu Oct 03 16:35:31 2024 +0200
@@ -16,7 +16,9 @@
 };
 use hg::{
     dirstate::{ParentFileData, TruncatedTimestamp},
-    dirstate_tree::dirstate_map::DirstateEntryReset,
+    dirstate_tree::dirstate_map::{
+        DirstateEntryReset, DirstateIdentity as CoreDirstateIdentity,
+    },
 };
 
 use crate::{
@@ -51,10 +53,13 @@
     @staticmethod
     def new_v1(
         on_disk: PyBytes,
-        identity: Option<u64>,
+        identity: Option<DirstateIdentity>,
     ) -> PyResult<PyObject> {
         let on_disk = PyBytesDeref::new(py, on_disk);
-        let (map, parents) = OwningDirstateMap::new_v1(on_disk, identity)
+        let (map, parents) = OwningDirstateMap::new_v1(
+            on_disk,
+            identity.map(|i| *i.inner(py))
+        )
             .map_err(|e| dirstate_error(py, e))?;
         let map = Self::create_instance(py, map)?;
         let p1 = PyBytes::new(py, parents.p1.as_bytes());
@@ -70,7 +75,7 @@
         data_size: usize,
         tree_metadata: PyBytes,
         uuid: PyBytes,
-        identity: Option<u64>,
+        identity: Option<DirstateIdentity>,
     ) -> PyResult<PyObject> {
         let dirstate_error = |e: DirstateError| {
             PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
@@ -82,7 +87,7 @@
             data_size,
             tree_metadata.data(py),
             uuid.to_owned(),
-            identity,
+            identity.map(|i| *i.inner(py)),
         ).map_err(dirstate_error)?;
         let map = Self::create_instance(py, map)?;
         Ok(map.into_object())
@@ -544,6 +549,41 @@
     Option<(PyBytes, PyObject)>
 );
 
+py_class!(pub class DirstateIdentity |py| {
+    data inner: CoreDirstateIdentity;
+
+    def __new__(
+        _cls,
+        mode: u32,
+        dev: u64,
+        ino: u64,
+        nlink: u64,
+        uid: u32,
+        gid: u32,
+        size: u64,
+        mtime: i64,
+        mtime_nsec: i64,
+        ctime: i64,
+        ctime_nsec: i64) -> PyResult<DirstateIdentity> {
+            Self::create_instance(
+                py,
+                CoreDirstateIdentity {
+                    mode,
+                    dev,
+                    ino,
+                    nlink,
+                    uid,
+                    gid,
+                    size,
+                    mtime,
+                    mtime_nsec,
+                    ctime,
+                    ctime_nsec
+                }
+            )
+    }
+});
+
 fn extract_node_id(py: Python, obj: &PyObject) -> PyResult<Node> {
     let bytes = obj.extract::<PyBytes>(py)?;
     match bytes.data(py).try_into() {