Mercurial > hg-stable
changeset 48538:4afb9627dc77
dirstate-v2: Apply SECOND_AMBIGUOUS to directory mtimes too
This would only be relevant in contrived scenarios such as a dirstate file
being written with a libc that supports sub-second precision in mtimes,
then transfered (at the filesystem level, not `hg clone`) to another
system where libc *doesn’t* have sub-second precision and truncates the stored
mtime of a directory to integer seconds.
Differential Revision: https://phab.mercurial-scm.org/D11939
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Fri, 17 Dec 2021 14:15:08 +0100 |
parents | c5d6c874766a |
children | 23ce9e7bf1e5 |
files | rust/hg-core/src/dirstate/status.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/dirstate_tree/status.rs rust/rhg/src/commands/status.rs |
diffstat | 4 files changed, 103 insertions(+), 82 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate/status.rs Wed Dec 15 15:28:41 2021 +0100 +++ b/rust/hg-core/src/dirstate/status.rs Fri Dec 17 14:15:08 2021 +0100 @@ -9,8 +9,8 @@ //! It is currently missing a lot of functionality compared to the Python one //! and will only be triggered in narrow cases. +use crate::dirstate::entry::TruncatedTimestamp; use crate::dirstate_tree::on_disk::DirstateV2ParseError; - use crate::{ utils::hg_path::{HgPath, HgPathError}, PatternError, @@ -77,7 +77,7 @@ pub struct DirstateStatus<'a> { /// The current time at the start of the `status()` algorithm, as measured /// and possibly truncated by the filesystem. - pub filesystem_time_at_status_start: Option<std::time::SystemTime>, + pub filesystem_time_at_status_start: Option<TruncatedTimestamp>, /// Tracked files whose contents have changed since the parent revision pub modified: Vec<StatusPath<'a>>,
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Wed Dec 15 15:28:41 2021 +0100 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Fri Dec 17 14:15:08 2021 +0100 @@ -382,7 +382,7 @@ && self.flags().contains(Flags::HAS_MTIME) && self.flags().contains(Flags::ALL_UNKNOWN_RECORDED) { - Ok(Some(self.mtime.try_into()?)) + Ok(Some(self.mtime()?)) } else { Ok(None) } @@ -402,6 +402,14 @@ file_type | permisions } + fn mtime(&self) -> Result<TruncatedTimestamp, DirstateV2ParseError> { + let mut m: TruncatedTimestamp = self.mtime.try_into()?; + if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { + m.second_ambiguous = true; + } + Ok(m) + } + fn assume_entry(&self) -> Result<DirstateEntry, DirstateV2ParseError> { // TODO: convert through raw bits instead? let wdir_tracked = self.flags().contains(Flags::WDIR_TRACKED); @@ -418,11 +426,7 @@ && !self.flags().contains(Flags::DIRECTORY) && !self.flags().contains(Flags::EXPECTED_STATE_IS_MODIFIED) { - let mut m: TruncatedTimestamp = self.mtime.try_into()?; - if self.flags().contains(Flags::MTIME_SECOND_AMBIGUOUS) { - m.second_ambiguous = true; - } - Some(m) + Some(self.mtime()?) } else { None }; @@ -681,7 +685,7 @@ dirstate_map::NodeData::Entry(entry) => { Node::from_dirstate_entry(entry) } - dirstate_map::NodeData::CachedDirectory { mtime } => ( + dirstate_map::NodeData::CachedDirectory { mtime } => { // we currently never set a mtime if unknown file // are present. // So if we have a mtime for a directory, we know @@ -692,12 +696,14 @@ // We never set ALL_IGNORED_RECORDED since we // don't track that case // currently. - Flags::DIRECTORY + let mut flags = Flags::DIRECTORY | Flags::HAS_MTIME - | Flags::ALL_UNKNOWN_RECORDED, - 0.into(), - (*mtime).into(), - ), + | Flags::ALL_UNKNOWN_RECORDED; + if mtime.second_ambiguous { + flags.insert(Flags::MTIME_SECOND_AMBIGUOUS) + } + (flags, 0.into(), (*mtime).into()) + } dirstate_map::NodeData::None => ( Flags::DIRECTORY, 0.into(),
--- a/rust/hg-core/src/dirstate_tree/status.rs Wed Dec 15 15:28:41 2021 +0100 +++ b/rust/hg-core/src/dirstate_tree/status.rs Fri Dec 17 14:15:08 2021 +0100 @@ -63,7 +63,8 @@ (Box::new(|&_| true), vec![], None) }; - let filesystem_time_at_status_start = filesystem_now(&root_dir).ok(); + let filesystem_time_at_status_start = + filesystem_now(&root_dir).ok().map(TruncatedTimestamp::from); let outcome = DirstateStatus { filesystem_time_at_status_start, ..Default::default() @@ -145,7 +146,7 @@ /// The current time at the start of the `status()` algorithm, as measured /// and possibly truncated by the filesystem. - filesystem_time_at_status_start: Option<SystemTime>, + filesystem_time_at_status_start: Option<TruncatedTimestamp>, } enum Outcome { @@ -472,71 +473,86 @@ directory_metadata: &std::fs::Metadata, dirstate_node: NodeRef<'tree, 'on_disk>, ) -> Result<(), DirstateV2ParseError> { - if children_all_have_dirstate_node_or_are_ignored { - // All filesystem directory entries from `read_dir` have a - // corresponding node in the dirstate, so we can reconstitute the - // names of those entries without calling `read_dir` again. - if let (Some(status_start), Ok(directory_mtime)) = ( - &self.filesystem_time_at_status_start, - directory_metadata.modified(), + if !children_all_have_dirstate_node_or_are_ignored { + return Ok(()); + } + // All filesystem directory entries from `read_dir` have a + // corresponding node in the dirstate, so we can reconstitute the + // names of those entries without calling `read_dir` again. + + // TODO: use let-else here and below when available: + // https://github.com/rust-lang/rust/issues/87335 + let status_start = if let Some(status_start) = + &self.filesystem_time_at_status_start + { + status_start + } else { + return Ok(()); + }; + + // Although the Rust standard library’s `SystemTime` type + // has nanosecond precision, the times reported for a + // directory’s (or file’s) modified time may have lower + // resolution based on the filesystem (for example ext3 + // only stores integer seconds), kernel (see + // https://stackoverflow.com/a/14393315/1162888), etc. + let directory_mtime = if let Ok(option) = + TruncatedTimestamp::for_reliable_mtime_of( + directory_metadata, + status_start, ) { - // Although the Rust standard library’s `SystemTime` type - // has nanosecond precision, the times reported for a - // directory’s (or file’s) modified time may have lower - // resolution based on the filesystem (for example ext3 - // only stores integer seconds), kernel (see - // https://stackoverflow.com/a/14393315/1162888), etc. - if &directory_mtime >= status_start { - // The directory was modified too recently, don’t cache its - // `read_dir` results. - // - // A timeline like this is possible: - // - // 1. A change to this directory (direct child was - // added or removed) cause its mtime to be set - // (possibly truncated) to `directory_mtime` - // 2. This `status` algorithm calls `read_dir` - // 3. An other change is made to the same directory is - // made so that calling `read_dir` agin would give - // different results, but soon enough after 1. that - // the mtime stays the same - // - // On a system where the time resolution poor, this - // scenario is not unlikely if all three steps are caused - // by the same script. - } else { - // We’ve observed (through `status_start`) that time has - // “progressed” since `directory_mtime`, so any further - // change to this directory is extremely likely to cause a - // different mtime. - // - // Having the same mtime again is not entirely impossible - // since the system clock is not monotonous. It could jump - // backward to some point before `directory_mtime`, then a - // directory change could potentially happen during exactly - // the wrong tick. - // - // We deem this scenario (unlike the previous one) to be - // unlikely enough in practice. - let truncated = TruncatedTimestamp::from(directory_mtime); - let is_up_to_date = if let Some(cached) = - dirstate_node.cached_directory_mtime()? - { - cached.likely_equal(truncated) - } else { - false - }; - if !is_up_to_date { - let hg_path = dirstate_node - .full_path_borrowed(self.dmap.on_disk)? - .detach_from_tree(); - self.new_cachable_directories - .lock() - .unwrap() - .push((hg_path, truncated)) - } - } + if let Some(directory_mtime) = option { + directory_mtime + } else { + // The directory was modified too recently, + // don’t cache its `read_dir` results. + // + // 1. A change to this directory (direct child was + // added or removed) cause its mtime to be set + // (possibly truncated) to `directory_mtime` + // 2. This `status` algorithm calls `read_dir` + // 3. An other change is made to the same directory is + // made so that calling `read_dir` agin would give + // different results, but soon enough after 1. that + // the mtime stays the same + // + // On a system where the time resolution poor, this + // scenario is not unlikely if all three steps are caused + // by the same script. + return Ok(()); } + } else { + // OS/libc does not support mtime? + return Ok(()); + }; + // We’ve observed (through `status_start`) that time has + // “progressed” since `directory_mtime`, so any further + // change to this directory is extremely likely to cause a + // different mtime. + // + // Having the same mtime again is not entirely impossible + // since the system clock is not monotonous. It could jump + // backward to some point before `directory_mtime`, then a + // directory change could potentially happen during exactly + // the wrong tick. + // + // We deem this scenario (unlike the previous one) to be + // unlikely enough in practice. + + let is_up_to_date = + if let Some(cached) = dirstate_node.cached_directory_mtime()? { + cached.likely_equal(directory_mtime) + } else { + false + }; + if !is_up_to_date { + let hg_path = dirstate_node + .full_path_borrowed(self.dmap.on_disk)? + .detach_from_tree(); + self.new_cachable_directories + .lock() + .unwrap() + .push((hg_path, directory_mtime)) } Ok(()) }
--- a/rust/rhg/src/commands/status.rs Wed Dec 15 15:28:41 2021 +0100 +++ b/rust/rhg/src/commands/status.rs Fri Dec 17 14:15:08 2021 +0100 @@ -315,9 +315,8 @@ } let mut dirstate_write_needed = ds_status.dirty; - let filesystem_time_at_status_start = ds_status - .filesystem_time_at_status_start - .map(TruncatedTimestamp::from); + let filesystem_time_at_status_start = + ds_status.filesystem_time_at_status_start; if (fixup.is_empty() || filesystem_time_at_status_start.is_none()) && !dirstate_write_needed