dirstate-v2: Truncate directory mtimes to 31 bits of seconds
… instead of 64 bits, while keeping the sub-second presision.
This brings the size of one timestamp from 12 bytes to 8 bytes.
31 bits is chosen instead of 32 because that’s already what happens for the
mtime of files and symlinks, because dirstate-v1 uses negative i32 values as
markers.
Later we’ll add sub-second precision for file/symlink mtimes, making their
dirstate-v2 representation the same as for directories.
Differential Revision: https://phab.mercurial-scm.org/D11633
--- a/mercurial/helptext/internals/dirstate-v2.txt Tue Oct 12 16:20:05 2021 +0200
+++ b/mercurial/helptext/internals/dirstate-v2.txt Tue Oct 12 16:38:13 2021 +0200
@@ -439,23 +439,24 @@
If an untracked node `HAS_MTIME` *unset*, this space is unused:
* Offset 31:
- 12 bytes set to zero
+ 12 unused bytes, set to zero
If an untracked node `HAS_MTIME` *set*,
what follows is the modification time of a directory
represented similarly to the C `timespec` struct:
* Offset 31:
+ 4 unused bytes, set to zero
+
+ * Offset 35:
The number of seconds elapsed since the Unix epoch,
- as a signed (two’s complement) 64-bit integer.
+ truncated to its lower 31 bits,
+ as a 32-bit integer.
* Offset 39:
- The number of nanoseconds elapsed since
- the instant specified by the previous field alone,
+ The sub-second number of nanoseconds elapsed since the Unix epoch,
as 32-bit integer.
Always greater than or equal to zero, and strictly less than a billion.
- Increasing this component makes the modification time
- go forward in time regardless of the sign of the seconds component.
The presence of a directory modification time means that at some point,
this path in the working directory was observed:
--- a/rust/hg-core/src/dirstate/entry.rs Tue Oct 12 16:20:05 2021 +0200
+++ b/rust/hg-core/src/dirstate/entry.rs Tue Oct 12 16:38:13 2021 +0200
@@ -1,3 +1,4 @@
+use crate::dirstate_tree::on_disk::DirstateV2ParseError;
use crate::errors::HgError;
use bitflags::bitflags;
use std::convert::TryFrom;
@@ -29,34 +30,76 @@
}
}
-#[derive(Copy, Clone, PartialEq)]
-pub struct Timestamp {
- seconds: i64,
-
- /// In `0 .. 1_000_000_000`.
- ///
- /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
+/// A Unix timestamp with nanoseconds precision
+#[derive(Copy, Clone)]
+pub struct TruncatedTimestamp {
+ truncated_seconds: u32,
+ /// Always in the `0 .. 1_000_000_000` range.
nanoseconds: u32,
}
-impl Timestamp {
- pub fn new(seconds: i64, nanoseconds: u32) -> Self {
+impl TruncatedTimestamp {
+ /// Constructs from a timestamp potentially outside of the supported range,
+ /// and truncate the seconds components to its lower 31 bits.
+ ///
+ /// Panics if the nanoseconds components is not in the expected range.
+ pub fn new_truncate(seconds: i64, nanoseconds: u32) -> Self {
+ assert!(nanoseconds < NSEC_PER_SEC);
Self {
- seconds,
+ truncated_seconds: seconds as u32 & RANGE_MASK_31BIT,
nanoseconds,
}
}
- pub fn seconds(&self) -> i64 {
- self.seconds
+ /// Construct from components. Returns an error if they are not in the
+ /// expcted range.
+ pub fn from_already_truncated(
+ truncated_seconds: u32,
+ nanoseconds: u32,
+ ) -> Result<Self, DirstateV2ParseError> {
+ if truncated_seconds & !RANGE_MASK_31BIT == 0
+ && nanoseconds < NSEC_PER_SEC
+ {
+ Ok(Self {
+ truncated_seconds,
+ nanoseconds,
+ })
+ } else {
+ Err(DirstateV2ParseError)
+ }
}
+ /// The lower 31 bits of the number of seconds since the epoch.
+ pub fn truncated_seconds(&self) -> u32 {
+ self.truncated_seconds
+ }
+
+ /// The sub-second component of this timestamp, in nanoseconds.
+ /// Always in the `0 .. 1_000_000_000` range.
+ ///
+ /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
pub fn nanoseconds(&self) -> u32 {
self.nanoseconds
}
+
+ /// Returns whether two timestamps are equal modulo 2**31 seconds.
+ ///
+ /// If this returns `true`, the original values converted from `SystemTime`
+ /// or given to `new_truncate` were very likely equal. A false positive is
+ /// possible if they were exactly a multiple of 2**31 seconds apart (around
+ /// 68 years). This is deemed very unlikely to happen by chance, especially
+ /// on filesystems that support sub-second precision.
+ ///
+ /// If someone is manipulating the modification times of some files to
+ /// intentionally make `hg status` return incorrect results, not truncating
+ /// wouldn’t help much since they can set exactly the expected timestamp.
+ pub fn very_likely_equal(&self, other: &Self) -> bool {
+ self.truncated_seconds == other.truncated_seconds
+ && self.nanoseconds == other.nanoseconds
+ }
}
-impl From<SystemTime> for Timestamp {
+impl From<SystemTime> for TruncatedTimestamp {
fn from(system_time: SystemTime) -> Self {
// On Unix, `SystemTime` is a wrapper for the `timespec` C struct:
// https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec
@@ -83,20 +126,17 @@
// For example if `system_time` was 4.3 seconds before
// the Unix epoch we get a Duration that represents
// `(-4, -0.3)` but we want `(-5, +0.7)`:
- const NSEC_PER_SEC: u32 = 1_000_000_000;
seconds = -1 - negative_secs;
nanoseconds = NSEC_PER_SEC - negative_nanos;
}
}
};
- Self {
- seconds,
- nanoseconds,
- }
+ Self::new_truncate(seconds, nanoseconds)
}
}
-pub const V1_RANGEMASK: i32 = 0x7FFFFFFF;
+const NSEC_PER_SEC: u32 = 1_000_000_000;
+const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF;
pub const MTIME_UNSET: i32 = -1;
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Oct 12 16:20:05 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Oct 12 16:38:13 2021 +0200
@@ -14,6 +14,7 @@
use crate::dirstate::parsers::Timestamp;
use crate::dirstate::CopyMapIter;
use crate::dirstate::StateMapIter;
+use crate::dirstate::TruncatedTimestamp;
use crate::dirstate::SIZE_FROM_OTHER_PARENT;
use crate::dirstate::SIZE_NON_NORMAL;
use crate::matchers::Matcher;
@@ -330,12 +331,12 @@
pub(super) fn cached_directory_mtime(
&self,
- ) -> Option<crate::dirstate::Timestamp> {
+ ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
match self {
- NodeRef::InMemory(_path, node) => match node.data {
+ NodeRef::InMemory(_path, node) => Ok(match node.data {
NodeData::CachedDirectory { mtime } => Some(mtime),
_ => None,
- },
+ }),
NodeRef::OnDisk(node) => node.cached_directory_mtime(),
}
}
@@ -376,7 +377,7 @@
pub(super) enum NodeData {
Entry(DirstateEntry),
- CachedDirectory { mtime: crate::dirstate::Timestamp },
+ CachedDirectory { mtime: TruncatedTimestamp },
None,
}
@@ -1177,8 +1178,8 @@
entry.debug_tuple()
} else if !all {
return Ok(None);
- } else if let Some(mtime) = node.cached_directory_mtime() {
- (b' ', 0, -1, mtime.seconds() as i32)
+ } else if let Some(mtime) = node.cached_directory_mtime()? {
+ (b' ', 0, -1, mtime.truncated_seconds() as i32)
} else {
(b' ', 0, -1, -1)
};
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Oct 12 16:20:05 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Tue Oct 12 16:38:13 2021 +0200
@@ -2,7 +2,7 @@
//!
//! See `mercurial/helptext/internals/dirstate-v2.txt`
-use crate::dirstate::Timestamp;
+use crate::dirstate::TruncatedTimestamp;
use crate::dirstate_tree::dirstate_map::{self, DirstateMap, NodeRef};
use crate::dirstate_tree::path_with_basename::WithBasename;
use crate::errors::HgError;
@@ -11,7 +11,7 @@
use crate::DirstateError;
use crate::DirstateParents;
use bitflags::bitflags;
-use bytes_cast::unaligned::{I32Be, I64Be, U16Be, U32Be};
+use bytes_cast::unaligned::{I32Be, U16Be, U32Be};
use bytes_cast::BytesCast;
use format_bytes::format_bytes;
use std::borrow::Cow;
@@ -122,11 +122,8 @@
#[derive(BytesCast, Copy, Clone)]
#[repr(C)]
struct PackedTimestamp {
- seconds: I64Be,
-
- /// In `0 .. 1_000_000_000`.
- ///
- /// This timestamp is after `(seconds, 0)` by this many nanoseconds.
+ _padding: U32Be,
+ truncated_seconds: U32Be,
nanoseconds: U32Be,
}
@@ -316,19 +313,23 @@
) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
if self.has_entry() {
Ok(dirstate_map::NodeData::Entry(self.assume_entry()))
- } else if let Some(mtime) = self.cached_directory_mtime() {
+ } else if let Some(mtime) = self.cached_directory_mtime()? {
Ok(dirstate_map::NodeData::CachedDirectory { mtime })
} else {
Ok(dirstate_map::NodeData::None)
}
}
- pub(super) fn cached_directory_mtime(&self) -> Option<Timestamp> {
- if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
- Some(self.data.as_timestamp())
- } else {
- None
- }
+ pub(super) fn cached_directory_mtime(
+ &self,
+ ) -> Result<Option<TruncatedTimestamp>, DirstateV2ParseError> {
+ Ok(
+ if self.flags.contains(Flags::HAS_MTIME) && !self.has_entry() {
+ Some(self.data.as_timestamp()?)
+ } else {
+ None
+ },
+ )
}
fn assume_entry(&self) -> DirstateEntry {
@@ -422,9 +423,10 @@
(flags, raw_entry)
}
- fn from_timestamp(timestamp: Timestamp) -> Self {
+ fn from_timestamp(timestamp: TruncatedTimestamp) -> Self {
let packed = PackedTimestamp {
- seconds: timestamp.seconds().into(),
+ _padding: 0.into(),
+ truncated_seconds: timestamp.truncated_seconds().into(),
nanoseconds: timestamp.nanoseconds().into(),
};
// Safety: both types implement the `ByteCast` trait, so we could
@@ -435,11 +437,14 @@
unsafe { std::mem::transmute::<PackedTimestamp, Entry>(packed) }
}
- fn as_timestamp(self) -> Timestamp {
+ fn as_timestamp(self) -> Result<TruncatedTimestamp, DirstateV2ParseError> {
// Safety: same as above in `from_timestamp`
let packed =
unsafe { std::mem::transmute::<Entry, PackedTimestamp>(self) };
- Timestamp::new(packed.seconds.get(), packed.nanoseconds.get())
+ TruncatedTimestamp::from_already_truncated(
+ packed.truncated_seconds.get(),
+ packed.nanoseconds.get(),
+ )
}
}
--- a/rust/hg-core/src/dirstate_tree/status.rs Tue Oct 12 16:20:05 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/status.rs Tue Oct 12 16:38:13 2021 +0200
@@ -1,4 +1,4 @@
-use crate::dirstate::entry::Timestamp;
+use crate::dirstate::entry::TruncatedTimestamp;
use crate::dirstate::status::IgnoreFnType;
use crate::dirstate_tree::dirstate_map::BorrowedPath;
use crate::dirstate_tree::dirstate_map::ChildNodesRef;
@@ -126,7 +126,8 @@
matcher: &'a (dyn Matcher + Sync),
ignore_fn: IgnoreFnType<'a>,
outcome: Mutex<DirstateStatus<'on_disk>>,
- new_cachable_directories: Mutex<Vec<(Cow<'on_disk, HgPath>, Timestamp)>>,
+ new_cachable_directories:
+ Mutex<Vec<(Cow<'on_disk, HgPath>, TruncatedTimestamp)>>,
outated_cached_directories: Mutex<Vec<Cow<'on_disk, HgPath>>>,
/// Whether ignore files like `.hgignore` have changed since the previous
@@ -165,7 +166,7 @@
dirstate_node: &NodeRef<'tree, 'on_disk>,
) -> Result<(), DirstateV2ParseError> {
if self.ignore_patterns_have_changed == Some(true)
- && dirstate_node.cached_directory_mtime().is_some()
+ && dirstate_node.cached_directory_mtime()?.is_some()
{
self.outated_cached_directories.lock().unwrap().push(
dirstate_node
@@ -182,7 +183,7 @@
fn can_skip_fs_readdir(
&self,
directory_metadata: Option<&std::fs::Metadata>,
- cached_directory_mtime: Option<Timestamp>,
+ cached_directory_mtime: Option<TruncatedTimestamp>,
) -> bool {
if !self.options.list_unknown && !self.options.list_ignored {
// All states that we care about listing have corresponding
@@ -199,8 +200,9 @@
// directory eligible for `read_dir` caching.
if let Some(meta) = directory_metadata {
if let Ok(current_mtime) = meta.modified() {
- let current_mtime = Timestamp::from(current_mtime);
- if current_mtime == cached_mtime {
+ let truncated =
+ TruncatedTimestamp::from(current_mtime);
+ if truncated.very_likely_equal(&cached_mtime) {
// The mtime of that directory has not changed
// since then, which means that the results of
// `read_dir` should also be unchanged.
@@ -222,7 +224,7 @@
directory_hg_path: &BorrowedPath<'tree, 'on_disk>,
directory_fs_path: &Path,
directory_metadata: Option<&std::fs::Metadata>,
- cached_directory_mtime: Option<Timestamp>,
+ cached_directory_mtime: Option<TruncatedTimestamp>,
is_at_repo_root: bool,
) -> Result<bool, DirstateV2ParseError> {
if self.can_skip_fs_readdir(directory_metadata, cached_directory_mtime)
@@ -363,7 +365,7 @@
hg_path,
fs_path,
Some(fs_metadata),
- dirstate_node.cached_directory_mtime(),
+ dirstate_node.cached_directory_mtime()?,
is_at_repo_root,
)?;
self.maybe_save_directory_mtime(
@@ -466,16 +468,22 @@
//
// We deem this scenario (unlike the previous one) to be
// unlikely enough in practice.
- let timestamp = directory_mtime.into();
- let cached = dirstate_node.cached_directory_mtime();
- if cached != Some(timestamp) {
+ let truncated = TruncatedTimestamp::from(directory_mtime);
+ let is_up_to_date = if let Some(cached) =
+ dirstate_node.cached_directory_mtime()?
+ {
+ cached.very_likely_equal(&truncated)
+ } else {
+ false
+ };
+ if !is_up_to_date {
let hg_path = dirstate_node
.full_path_borrowed(self.dmap.on_disk)?
.detach_from_tree();
self.new_cachable_directories
.lock()
.unwrap()
- .push((hg_path, timestamp))
+ .push((hg_path, truncated))
}
}
}