changeset 48501:4afb9627dc77

dirstate-v2: Apply SECOND_AMBIGUOUS to directory mtimes too This would only be relevant in contrived scenarios such as a dirstate file being written with a libc that supports sub-second precision in mtimes, then transfered (at the filesystem level, not `hg clone`) to another system where libc *doesn’t* have sub-second precision and truncates the stored mtime of a directory to integer seconds. Differential Revision: https://phab.mercurial-scm.org/D11939
author Simon Sapin <simon.sapin@octobus.net>
date Fri, 17 Dec 2021 14:15:08 +0100
parents c5d6c874766a
children 23ce9e7bf1e5
files rust/hg-core/src/dirstate/status.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/dirstate_tree/status.rs rust/rhg/src/commands/status.rs
diffstat 4 files changed, 103 insertions(+), 82 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs	Wed Dec 15 15:28:41 2021 +0100
+++ b/rust/hg-core/src/dirstate/status.rs	Fri Dec 17 14:15:08 2021 +0100
@@ -9,8 +9,8 @@
 //! It is currently missing a lot of functionality compared to the Python one
 //! and will only be triggered in narrow cases.
 
+use crate::dirstate::entry::TruncatedTimestamp;
 use crate::dirstate_tree::on_disk::DirstateV2ParseError;
-
 use crate::{
     utils::hg_path::{HgPath, HgPathError},
     PatternError,
@@ -77,7 +77,7 @@
 pub struct DirstateStatus<'a> {
     /// The current time at the start of the `status()` algorithm, as measured
     /// and possibly truncated by the filesystem.
-    pub filesystem_time_at_status_start: Option<std::time::SystemTime>,
+    pub filesystem_time_at_status_start: Option<TruncatedTimestamp>,
 
     /// Tracked files whose contents have changed since the parent revision
     pub modified: Vec<StatusPath<'a>>,
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Wed Dec 15 15:28:41 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Fri Dec 17 14:15:08 2021 +0100
@@ -382,7 +382,7 @@
             && self.flags().contains(Flags::HAS_MTIME)
             && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED)
         {
-            Ok(Some(self.mtime.try_into()?))
+            Ok(Some(self.mtime()?))
         } else {
             Ok(None)
         }
@@ -402,6 +402,14 @@
         file_type | permisions
     }
 
+    fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
+        let mut m: TruncatedTimestamp = self.mtime.try_into()?;
+        if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
+            m.second_ambiguous = true;
+        }
+        Ok(m)
+    }
+
     fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> {
         // TODO: convert through raw bits instead?
         let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED);
@@ -418,11 +426,7 @@
             && !self.flags().contains(Flags::DIRECTORY)
             && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED)
         {
-            let mut m: TruncatedTimestamp = self.mtime.try_into()?;
-            if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) {
-                m.second_ambiguous = true;
-            }
-            Some(m)
+            Some(self.mtime()?)
         } else {
             None
         };
@@ -681,7 +685,7 @@
                         dirstate_map::NodeData::Entry(entry) => {
                             Node::from_dirstate_entry(entry)
                         }
-                        dirstate_map::NodeData::CachedDirectory { mtime } => (
+                        dirstate_map::NodeData::CachedDirectory { mtime } => {
                             // we currently never set a mtime if unknown file
                             // are present.
                             // So if we have a mtime for a directory, we know
@@ -692,12 +696,14 @@
                             // We never set ALL_IGNORED_RECORDED since we
                             // don't track that case
                             // currently.
-                            Flags::DIRECTORY
+                            let mut flags = Flags::DIRECTORY
                                 | Flags::HAS_MTIME
-                                | Flags::ALL_UNKNOWN_RECORDED,
-                            0.into(),
-                            (*mtime).into(),
-                        ),
+                                | Flags::ALL_UNKNOWN_RECORDED;
+                            if mtime.second_ambiguous {
+                                flags.insert(Flags::MTIME_SECOND_AMBIGUOUS)
+                            }
+                            (flags, 0.into(), (*mtime).into())
+                        }
                         dirstate_map::NodeData::None => (
                             Flags::DIRECTORY,
                             0.into(),
--- a/rust/hg-core/src/dirstate_tree/status.rs	Wed Dec 15 15:28:41 2021 +0100
+++ b/rust/hg-core/src/dirstate_tree/status.rs	Fri Dec 17 14:15:08 2021 +0100
@@ -63,7 +63,8 @@
             (Box::new(|&_| true), vec![], None)
         };
 
-    let filesystem_time_at_status_start = filesystem_now(&root_dir).ok();
+    let filesystem_time_at_status_start =
+        filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from);
     let outcome = DirstateStatus {
         filesystem_time_at_status_start,
         ..Default::default()
@@ -145,7 +146,7 @@
 
     /// The current time at the start of the `status()` algorithm, as measured
     /// and possibly truncated by the filesystem.
-    filesystem_time_at_status_start: Option<SystemTime>,
+    filesystem_time_at_status_start: Option<TruncatedTimestamp>,
 }
 
 enum Outcome {
@@ -472,71 +473,86 @@
         directory_metadata: &std::fs::Metadata,
         dirstate_node: NodeRef<'tree, 'on_disk>,
     ) -> Result<(), DirstateV2ParseError> {
-        if children_all_have_dirstate_node_or_are_ignored {
-            // All filesystem directory entries from `read_dir` have a
-            // corresponding node in the dirstate, so we can reconstitute the
-            // names of those entries without calling `read_dir` again.
-            if let (Some(status_start), Ok(directory_mtime)) = (
-                &self.filesystem_time_at_status_start,
-                directory_metadata.modified(),
+        if !children_all_have_dirstate_node_or_are_ignored {
+            return Ok(());
+        }
+        // All filesystem directory entries from `read_dir` have a
+        // corresponding node in the dirstate, so we can reconstitute the
+        // names of those entries without calling `read_dir` again.
+
+        // TODO: use let-else here and below when available:
+        // https://github.com/rust-lang/rust/issues/87335
+        let status_start = if let Some(status_start) =
+            &self.filesystem_time_at_status_start
+        {
+            status_start
+        } else {
+            return Ok(());
+        };
+
+        // Although the Rust standard library’s `SystemTime` type
+        // has nanosecond precision, the times reported for a
+        // directory’s (or file’s) modified time may have lower
+        // resolution based on the filesystem (for example ext3
+        // only stores integer seconds), kernel (see
+        // https://stackoverflow.com/a/14393315/1162888), etc.
+        let directory_mtime = if let Ok(option) =
+            TruncatedTimestamp::for_reliable_mtime_of(
+                directory_metadata,
+                status_start,
             ) {
-                // Although the Rust standard library’s `SystemTime` type
-                // has nanosecond precision, the times reported for a
-                // directory’s (or file’s) modified time may have lower
-                // resolution based on the filesystem (for example ext3
-                // only stores integer seconds), kernel (see
-                // https://stackoverflow.com/a/14393315/1162888), etc.
-                if &directory_mtime >= status_start {
-                    // The directory was modified too recently, don’t cache its
-                    // `read_dir` results.
-                    //
-                    // A timeline like this is possible:
-                    //
-                    // 1. A change to this directory (direct child was
-                    //    added or removed) cause its mtime to be set
-                    //    (possibly truncated) to `directory_mtime`
-                    // 2. This `status` algorithm calls `read_dir`
-                    // 3. An other change is made to the same directory is
-                    //    made so that calling `read_dir` agin would give
-                    //    different results, but soon enough after 1. that
-                    //    the mtime stays the same
-                    //
-                    // On a system where the time resolution poor, this
-                    // scenario is not unlikely if all three steps are caused
-                    // by the same script.
-                } else {
-                    // We’ve observed (through `status_start`) that time has
-                    // “progressed” since `directory_mtime`, so any further
-                    // change to this directory is extremely likely to cause a
-                    // different mtime.
-                    //
-                    // Having the same mtime again is not entirely impossible
-                    // since the system clock is not monotonous. It could jump
-                    // backward to some point before `directory_mtime`, then a
-                    // directory change could potentially happen during exactly
-                    // the wrong tick.
-                    //
-                    // We deem this scenario (unlike the previous one) to be
-                    // unlikely enough in practice.
-                    let truncated = TruncatedTimestamp::from(directory_mtime);
-                    let is_up_to_date = if let Some(cached) =
-                        dirstate_node.cached_directory_mtime()?
-                    {
-                        cached.likely_equal(truncated)
-                    } else {
-                        false
-                    };
-                    if !is_up_to_date {
-                        let hg_path = dirstate_node
-                            .full_path_borrowed(self.dmap.on_disk)?
-                            .detach_from_tree();
-                        self.new_cachable_directories
-                            .lock()
-                            .unwrap()
-                            .push((hg_path, truncated))
-                    }
-                }
+            if let Some(directory_mtime) = option {
+                directory_mtime
+            } else {
+                // The directory was modified too recently,
+                // don’t cache its `read_dir` results.
+                //
+                // 1. A change to this directory (direct child was
+                //    added or removed) cause its mtime to be set
+                //    (possibly truncated) to `directory_mtime`
+                // 2. This `status` algorithm calls `read_dir`
+                // 3. An other change is made to the same directory is
+                //    made so that calling `read_dir` agin would give
+                //    different results, but soon enough after 1. that
+                //    the mtime stays the same
+                //
+                // On a system where the time resolution poor, this
+                // scenario is not unlikely if all three steps are caused
+                // by the same script.
+                return Ok(());
             }
+        } else {
+            // OS/libc does not support mtime?
+            return Ok(());
+        };
+        // We’ve observed (through `status_start`) that time has
+        // “progressed” since `directory_mtime`, so any further
+        // change to this directory is extremely likely to cause a
+        // different mtime.
+        //
+        // Having the same mtime again is not entirely impossible
+        // since the system clock is not monotonous. It could jump
+        // backward to some point before `directory_mtime`, then a
+        // directory change could potentially happen during exactly
+        // the wrong tick.
+        //
+        // We deem this scenario (unlike the previous one) to be
+        // unlikely enough in practice.
+
+        let is_up_to_date =
+            if let Some(cached) = dirstate_node.cached_directory_mtime()? {
+                cached.likely_equal(directory_mtime)
+            } else {
+                false
+            };
+        if !is_up_to_date {
+            let hg_path = dirstate_node
+                .full_path_borrowed(self.dmap.on_disk)?
+                .detach_from_tree();
+            self.new_cachable_directories
+                .lock()
+                .unwrap()
+                .push((hg_path, directory_mtime))
         }
         Ok(())
     }
--- a/rust/rhg/src/commands/status.rs	Wed Dec 15 15:28:41 2021 +0100
+++ b/rust/rhg/src/commands/status.rs	Fri Dec 17 14:15:08 2021 +0100
@@ -315,9 +315,8 @@
     }
 
     let mut dirstate_write_needed = ds_status.dirty;
-    let filesystem_time_at_status_start = ds_status
-        .filesystem_time_at_status_start
-        .map(TruncatedTimestamp::from);
+    let filesystem_time_at_status_start =
+        ds_status.filesystem_time_at_status_start;
 
     if (fixup.is_empty() || filesystem_time_at_status_start.is_none())
         && !dirstate_write_needed