Mercurial > hg
changeset 48193:320de901896a
dirstate-v2: Truncate directory mtimes to 31 bits of seconds
… instead of 64 bits, while keeping the sub-second presision.
This brings the size of one timestamp from 12 bytes to 8 bytes.
31 bits is chosen instead of 32 because that’s already what happens for the
mtime of files and symlinks, because dirstate-v1 uses negative i32 values as
markers.
Later we’ll add sub-second precision for file/symlink mtimes, making their
dirstate-v2 representation the same as for directories.
Differential Revision: https://phab.mercurial-scm.org/D11633
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Tue, 12 Oct 2021 16:38:13 +0200 |
parents | d2f760c2c91c |
children | 1000db4a71f1 |
files | mercurial/helptext/internals/dirstate-v2.txt rust/hg-core/src/dirstate/entry.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-core/src/dirstate_tree/status.rs |
diffstat | 5 files changed, 116 insertions(+), 61 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/helptext/internals/dirstate-v2.txt Tue Oct 12 16:20:05 2021 +0200 +++ b/mercurial/helptext/internals/dirstate-v2.txt Tue Oct 12 16:38:13 2021 +0200 @@ -439,23 +439,24 @@ If an untracked node `HAS_MTIME` *unset*, this space is unused: * Offset 31: - 12 bytes set to zero + 12 unused bytes, set to zero If an untracked node `HAS_MTIME` *set*, what follows is the modification time of a directory represented similarly to the C `timespec` struct: * Offset 31: + 4 unused bytes, set to zero + + * Offset 35: The number of seconds elapsed since the Unix epoch, - as a signed (two’s complement) 64-bit integer. + truncated to its lower 31 bits, + as a 32-bit integer. * Offset 39: - The number of nanoseconds elapsed since - the instant specified by the previous field alone, + The sub-second number of nanoseconds elapsed since the Unix epoch, as 32-bit integer. Always greater than or equal to zero, and strictly less than a billion. - Increasing this component makes the modification time - go forward in time regardless of the sign of the seconds component. The presence of a directory modification time means that at some point, this path in the working directory was observed:
--- a/rust/hg-core/src/dirstate/entry.rs Tue Oct 12 16:20:05 2021 +0200 +++ b/rust/hg-core/src/dirstate/entry.rs Tue Oct 12 16:38:13 2021 +0200 @@ -1,3 +1,4 @@ +use crate::dirstate_tree::on_disk::DirstateV2ParseError; use crate::errors::HgError; use bitflags::bitflags; use std::convert::TryFrom; @@ -29,34 +30,76 @@ } } -#[derive(Copy, Clone, PartialEq)] -pub struct Timestamp { - seconds: i64, - - /// In `0 .. 1_000_000_000`. - /// - /// This timestamp is after `(seconds, 0)` by this many nanoseconds. +/// A Unix timestamp with nanoseconds precision +#[derive(Copy, Clone)] +pub struct TruncatedTimestamp { + truncated_seconds: u32, + /// Always in the `0 .. 1_000_000_000` range. nanoseconds: u32, } -impl Timestamp { - pub fn new(seconds: i64, nanoseconds: u32) -> Self { +impl TruncatedTimestamp { + /// Constructs from a timestamp potentially outside of the supported range, + /// and truncate the seconds components to its lower 31 bits. + /// + /// Panics if the nanoseconds components is not in the expected range. + pub fn new_truncate(seconds: i64, nanoseconds: u32) -> Self { + assert!(nanoseconds < NSEC_PER_SEC); Self { - seconds, + truncated_seconds: seconds as u32 & RANGE_MASK_31BIT, nanoseconds, } } - pub fn seconds(&self) -> i64 { - self.seconds + /// Construct from components. Returns an error if they are not in the + /// expcted range. + pub fn from_already_truncated( + truncated_seconds: u32, + nanoseconds: u32, + ) -> Result<Self, DirstateV2ParseError> { + if truncated_seconds & !RANGE_MASK_31BIT == 0 + && nanoseconds < NSEC_PER_SEC + { + Ok(Self { + truncated_seconds, + nanoseconds, + }) + } else { + Err(DirstateV2ParseError) + } } + /// The lower 31 bits of the number of seconds since the epoch. + pub fn truncated_seconds(&self) -> u32 { + self.truncated_seconds + } + + /// The sub-second component of this timestamp, in nanoseconds. + /// Always in the `0 .. 1_000_000_000` range. + /// + /// This timestamp is after `(seconds, 0)` by this many nanoseconds. pub fn nanoseconds(&self) -> u32 { self.nanoseconds } + + /// Returns whether two timestamps are equal modulo 2**31 seconds. + /// + /// If this returns `true`, the original values converted from `SystemTime` + /// or given to `new_truncate` were very likely equal. A false positive is + /// possible if they were exactly a multiple of 2**31 seconds apart (around + /// 68 years). This is deemed very unlikely to happen by chance, especially + /// on filesystems that support sub-second precision. + /// + /// If someone is manipulating the modification times of some files to + /// intentionally make `hg status` return incorrect results, not truncating + /// wouldn’t help much since they can set exactly the expected timestamp. + pub fn very_likely_equal(&self, other: &Self) -> bool { + self.truncated_seconds == other.truncated_seconds + && self.nanoseconds == other.nanoseconds + } } -impl From<SystemTime> for Timestamp { +impl From<SystemTime> for TruncatedTimestamp { fn from(system_time: SystemTime) -> Self { // On Unix, `SystemTime` is a wrapper for the `timespec` C struct: // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec @@ -83,20 +126,17 @@ // For example if `system_time` was 4.3 seconds before // the Unix epoch we get a Duration that represents // `(-4, -0.3)` but we want `(-5, +0.7)`: - const NSEC_PER_SEC: u32 = 1_000_000_000; seconds = -1 - negative_secs; nanoseconds = NSEC_PER_SEC - negative_nanos; } } }; - Self { - seconds, - nanoseconds, - } + Self::new_truncate(seconds, nanoseconds) } } -pub const V1_RANGEMASK: i32 = 0x7FFFFFFF; +const NSEC_PER_SEC: u32 = 1_000_000_000; +const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF; pub const MTIME_UNSET: i32 = -1;
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Oct 12 16:20:05 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Oct 12 16:38:13 2021 +0200 @@ -14,6 +14,7 @@ use crate::dirstate::parsers::Timestamp; use crate::dirstate::CopyMapIter; use crate::dirstate::StateMapIter; +use crate::dirstate::TruncatedTimestamp; use crate::dirstate::SIZE_FROM_OTHER_PARENT; use crate::dirstate::SIZE_NON_NORMAL; use crate::matchers::Matcher; @@ -330,12 +331,12 @@ pub(super) fn cached_directory_mtime( &self, - ) -> Option<crate::dirstate::Timestamp> { + ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> { match self { - NodeRef::InMemory(_path, node) => match node.data { + NodeRef::InMemory(_path, node) => Ok(match node.data { NodeData::CachedDirectory { mtime } => Some(mtime), _ => None, - }, + }), NodeRef::OnDisk(node) => node.cached_directory_mtime(), } } @@ -376,7 +377,7 @@ pub(super) enum NodeData { Entry(DirstateEntry), - CachedDirectory { mtime: crate::dirstate::Timestamp }, + CachedDirectory { mtime: TruncatedTimestamp }, None, } @@ -1177,8 +1178,8 @@ entry.debug_tuple() } else if !all { return Ok(None); - } else if let Some(mtime) = node.cached_directory_mtime() { - (b' ', 0, -1, mtime.seconds() as i32) + } else if let Some(mtime) = node.cached_directory_mtime()? { + (b' ', 0, -1, mtime.truncated_seconds() as i32) } else { (b' ', 0, -1, -1) };
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Oct 12 16:20:05 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Oct 12 16:38:13 2021 +0200 @@ -2,7 +2,7 @@ //! //! See `mercurial/helptext/internals/dirstate-v2.txt` -use crate::dirstate::Timestamp; +use crate::dirstate::TruncatedTimestamp; use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef}; use crate::dirstate_tree::path_with_basename::WithBasename; use crate::errors::HgError; @@ -11,7 +11,7 @@ use crate::DirstateError; use crate::DirstateParents; use bitflags::bitflags; -use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be}; +use bytes_cast::unaligned::{I32Be, U16Be, U32Be}; use bytes_cast::BytesCast; use format_bytes::format_bytes; use std::borrow::Cow; @@ -122,11 +122,8 @@ #[derive(BytesCast, Copy, Clone)] #[repr(C)] struct PackedTimestamp { - seconds: I64Be, - - /// In `0 .. 1_000_000_000`. - /// - /// This timestamp is after `(seconds, 0)` by this many nanoseconds. + _padding: U32Be, + truncated_seconds: U32Be, nanoseconds: U32Be, } @@ -316,19 +313,23 @@ ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { if self.has_entry() { Ok(dirstate_map::NodeData::Entry(self.assume_entry())) - } else if let Some(mtime) = self.cached_directory_mtime() { + } else if let Some(mtime) = self.cached_directory_mtime()? { Ok(dirstate_map::NodeData::CachedDirectory { mtime }) } else { Ok(dirstate_map::NodeData::None) } } - pub(super) fn cached_directory_mtime(&self) -> Option<Timestamp> { - if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() { - Some(self.data.as_timestamp()) - } else { - None - } + pub(super) fn cached_directory_mtime( + &self, + ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> { + Ok( + if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() { + Some(self.data.as_timestamp()?) + } else { + None + }, + ) } fn assume_entry(&self) -> DirstateEntry { @@ -422,9 +423,10 @@ (flags, raw_entry) } - fn from_timestamp(timestamp: Timestamp) -> Self { + fn from_timestamp(timestamp: TruncatedTimestamp) -> Self { let packed = PackedTimestamp { - seconds: timestamp.seconds().into(), + _padding: 0.into(), + truncated_seconds: timestamp.truncated_seconds().into(), nanoseconds: timestamp.nanoseconds().into(), }; // Safety: both types implement the `ByteCast` trait, so we could @@ -435,11 +437,14 @@ unsafe { std::mem::transmute::<PackedTimestamp, Entry>(packed) } } - fn as_timestamp(self) -> Timestamp { + fn as_timestamp(self) -> Result<TruncatedTimestamp, DirstateV2ParseError> { // Safety: same as above in `from_timestamp` let packed = unsafe { std::mem::transmute::<Entry, PackedTimestamp>(self) }; - Timestamp::new(packed.seconds.get(), packed.nanoseconds.get()) + TruncatedTimestamp::from_already_truncated( + packed.truncated_seconds.get(), + packed.nanoseconds.get(), + ) } }
--- a/rust/hg-core/src/dirstate_tree/status.rs Tue Oct 12 16:20:05 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/status.rs Tue Oct 12 16:38:13 2021 +0200 @@ -1,4 +1,4 @@ -use crate::dirstate::entry::Timestamp; +use crate::dirstate::entry::TruncatedTimestamp; use crate::dirstate::status::IgnoreFnType; use crate::dirstate_tree::dirstate_map::BorrowedPath; use crate::dirstate_tree::dirstate_map::ChildNodesRef; @@ -126,7 +126,8 @@ matcher: &'a (dyn Matcher + Sync), ignore_fn: IgnoreFnType<'a>, outcome: Mutex<DirstateStatus<'on_disk>>, - new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>, + new_cachable_directories: + Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>, outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>, /// Whether ignore files like `.hgignore` have changed since the previous @@ -165,7 +166,7 @@ dirstate_node: &NodeRef<'tree, 'on_disk>, ) -> Result<(), DirstateV2ParseError> { if self.ignore_patterns_have_changed == Some(true) - && dirstate_node.cached_directory_mtime().is_some() + && dirstate_node.cached_directory_mtime()?.is_some() { self.outated_cached_directories.lock().unwrap().push( dirstate_node @@ -182,7 +183,7 @@ fn can_skip_fs_readdir( &self, directory_metadata: Option<&std::fs::Metadata>, - cached_directory_mtime: Option<Timestamp>, + cached_directory_mtime: Option<TruncatedTimestamp>, ) -> bool { if !self.options.list_unknown && !self.options.list_ignored { // All states that we care about listing have corresponding @@ -199,8 +200,9 @@ // directory eligible for `read_dir` caching. if let Some(meta) = directory_metadata { if let Ok(current_mtime) = meta.modified() { - let current_mtime = Timestamp::from(current_mtime); - if current_mtime == cached_mtime { + let truncated = + TruncatedTimestamp::from(current_mtime); + if truncated.very_likely_equal(&cached_mtime) { // The mtime of that directory has not changed // since then, which means that the results of // `read_dir` should also be unchanged. @@ -222,7 +224,7 @@ directory_hg_path: &BorrowedPath<'tree, 'on_disk>, directory_fs_path: &Path, directory_metadata: Option<&std::fs::Metadata>, - cached_directory_mtime: Option<Timestamp>, + cached_directory_mtime: Option<TruncatedTimestamp>, is_at_repo_root: bool, ) -> Result<bool, DirstateV2ParseError> { if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime) @@ -363,7 +365,7 @@ hg_path, fs_path, Some(fs_metadata), - dirstate_node.cached_directory_mtime(), + dirstate_node.cached_directory_mtime()?, is_at_repo_root, )?; self.maybe_save_directory_mtime( @@ -466,16 +468,22 @@ // // We deem this scenario (unlike the previous one) to be // unlikely enough in practice. - let timestamp = directory_mtime.into(); - let cached = dirstate_node.cached_directory_mtime(); - if cached != Some(timestamp) { + let truncated = TruncatedTimestamp::from(directory_mtime); + let is_up_to_date = if let Some(cached) = + dirstate_node.cached_directory_mtime()? + { + cached.very_likely_equal(&truncated) + } else { + false + }; + if !is_up_to_date { let hg_path = dirstate_node .full_path_borrowed(self.dmap.on_disk)? .detach_from_tree(); self.new_cachable_directories .lock() .unwrap() - .push((hg_path, timestamp)) + .push((hg_path, truncated)) } } }