Mercurial > hg
view rust/hg-core/src/dirstate/entry.rs @ 52302:db065b33fa56
rust-dirstate: merge `dirstate_tree` module into `dirstate`
The historical reasonning for `dirstate_tree` existing in the first place is
that a new approach was needed for the tree-like dirstate and it was easier
to start somewhat fresh. Now that the former dirstate is (long) gone, we
can merge those two modules to avoid the confusion that even the module
creators sometimes get.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 04 Nov 2024 11:00:58 +0100 |
parents | 0529e1a468dd |
children |
line wrap: on
line source
use crate::dirstate::on_disk::DirstateV2ParseError; use crate::errors::HgError; use bitflags::bitflags; use std::fs; use std::io; use std::time::{SystemTime, UNIX_EPOCH}; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum EntryState { Normal, Added, Removed, Merged, } /// `size` and `mtime.seconds` are truncated to 31 bits. /// /// TODO: double-check status algorithm correctness for files /// larger than 2 GiB or modified after 2038. #[derive(Debug, Copy, Clone)] pub struct DirstateEntry { pub(crate) flags: Flags, mode_size: Option<(u32, u32)>, mtime: Option<TruncatedTimestamp>, } bitflags! { pub(crate) struct Flags: u8 { const WDIR_TRACKED = 1 << 0; const P1_TRACKED = 1 << 1; const P2_INFO = 1 << 2; const HAS_FALLBACK_EXEC = 1 << 3; const FALLBACK_EXEC = 1 << 4; const HAS_FALLBACK_SYMLINK = 1 << 5; const FALLBACK_SYMLINK = 1 << 6; } } /// A Unix timestamp with nanoseconds precision #[derive(Debug, Copy, Clone)] pub struct TruncatedTimestamp { truncated_seconds: u32, /// Always in the `0 .. 1_000_000_000` range. nanoseconds: u32, /// TODO this should be in DirstateEntry, but the current code needs /// refactoring to use DirstateEntry instead of TruncatedTimestamp for /// comparison. pub second_ambiguous: bool, } impl TruncatedTimestamp { /// Constructs from a timestamp potentially outside of the supported range, /// and truncate the seconds components to its lower 31 bits. /// /// Panics if the nanoseconds components is not in the expected range. pub fn new_truncate( seconds: i64, nanoseconds: u32, second_ambiguous: bool, ) -> Self { assert!(nanoseconds < NSEC_PER_SEC); Self { truncated_seconds: seconds as u32 & RANGE_MASK_31BIT, nanoseconds, second_ambiguous, } } /// Construct from components. Returns an error if they are not in the /// expcted range. pub fn from_already_truncated( truncated_seconds: u32, nanoseconds: u32, second_ambiguous: bool, ) -> Result<Self, DirstateV2ParseError> { if truncated_seconds & !RANGE_MASK_31BIT == 0 && nanoseconds < NSEC_PER_SEC { Ok(Self { truncated_seconds, nanoseconds, second_ambiguous, }) } else { Err(DirstateV2ParseError::new("when reading datetime")) } } /// Returns a `TruncatedTimestamp` for the modification time of `metadata`. /// /// Propagates errors from `std` on platforms where modification time /// is not available at all. pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> { #[cfg(unix)] { use std::os::unix::fs::MetadataExt; let seconds = metadata.mtime(); // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range let nanoseconds = metadata.mtime_nsec().try_into().unwrap(); Ok(Self::new_truncate(seconds, nanoseconds, false)) } #[cfg(not(unix))] { metadata.modified().map(Self::from) } } /// Like `for_mtime_of`, but may return `None` or a value with /// `second_ambiguous` set if the mtime is not "reliable". /// /// A modification time is reliable if it is older than `boundary` (or /// sufficiently in the future). /// /// Otherwise a concurrent modification might happens with the same mtime. pub fn for_reliable_mtime_of( metadata: &fs::Metadata, boundary: &Self, ) -> io::Result<Option<Self>> { Ok(Self::for_mtime_of(metadata)?.for_reliable_mtime_of_self(boundary)) } /// See [`Self::for_reliable_mtime_of`] pub fn for_reliable_mtime_of_self(&self, boundary: &Self) -> Option<Self> { let mut new = *self; // If the mtime of the ambiguous file is younger (or equal) to the // starting point of the `status` walk, we cannot garantee that // another, racy, write will not happen right after with the same mtime // and we cannot cache the information. // // However if the mtime is far away in the future, this is likely some // mismatch between the current clock and previous file system // operation. So mtime more than one days in the future are considered // fine. let reliable = if self.truncated_seconds == boundary.truncated_seconds { new.second_ambiguous = true; self.nanoseconds != 0 && boundary.nanoseconds != 0 && self.nanoseconds < boundary.nanoseconds } else { // `truncated_seconds` is less than 2**31, // so this does not overflow `u32`: let one_day_later = boundary.truncated_seconds + 24 * 3600; self.truncated_seconds < boundary.truncated_seconds || self.truncated_seconds > one_day_later }; if reliable { Some(new) } else { None } } /// The lower 31 bits of the number of seconds since the epoch. pub fn truncated_seconds(&self) -> u32 { self.truncated_seconds } /// The sub-second component of this timestamp, in nanoseconds. /// Always in the `0 .. 1_000_000_000` range. /// /// This timestamp is after `(seconds, 0)` by this many nanoseconds. pub fn nanoseconds(&self) -> u32 { self.nanoseconds } /// Returns whether two timestamps are equal modulo 2**31 seconds. /// /// If this returns `true`, the original values converted from `SystemTime` /// or given to `new_truncate` were very likely equal. A false positive is /// possible if they were exactly a multiple of 2**31 seconds apart (around /// 68 years). This is deemed very unlikely to happen by chance, especially /// on filesystems that support sub-second precision. /// /// If someone is manipulating the modification times of some files to /// intentionally make `hg status` return incorrect results, not truncating /// wouldn’t help much since they can set exactly the expected timestamp. /// /// Sub-second precision is ignored if it is zero in either value. /// Some APIs simply return zero when more precision is not available. /// When comparing values from different sources, if only one is truncated /// in that way, doing a simple comparison would cause many false /// negatives. pub fn likely_equal(self, other: Self) -> bool { if self.truncated_seconds != other.truncated_seconds { false } else if self.nanoseconds == 0 || other.nanoseconds == 0 { !self.second_ambiguous } else { self.nanoseconds == other.nanoseconds } } pub fn likely_equal_to_mtime_of( self, metadata: &fs::Metadata, ) -> io::Result<bool> { Ok(self.likely_equal(Self::for_mtime_of(metadata)?)) } } impl From<SystemTime> for TruncatedTimestamp { fn from(system_time: SystemTime) -> Self { // On Unix, `SystemTime` is a wrapper for the `timespec` C struct: // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec // We want to effectively access its fields, but the Rust standard // library does not expose them. The best we can do is: let seconds; let nanoseconds; match system_time.duration_since(UNIX_EPOCH) { Ok(duration) => { seconds = duration.as_secs() as i64; nanoseconds = duration.subsec_nanos(); } Err(error) => { // `system_time` is before `UNIX_EPOCH`. // We need to undo this algorithm: // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41 let negative = error.duration(); let negative_secs = negative.as_secs() as i64; let negative_nanos = negative.subsec_nanos(); if negative_nanos == 0 { seconds = -negative_secs; nanoseconds = 0; } else { // For example if `system_time` was 4.3 seconds before // the Unix epoch we get a Duration that represents // `(-4, -0.3)` but we want `(-5, +0.7)`: seconds = -1 - negative_secs; nanoseconds = NSEC_PER_SEC - negative_nanos; } } }; Self::new_truncate(seconds, nanoseconds, false) } } const NSEC_PER_SEC: u32 = 1_000_000_000; pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF; pub const MTIME_UNSET: i32 = -1; /// A `DirstateEntry` with a size of `-2` means that it was merged from the /// other parent. This allows revert to pick the right status back during a /// merge. pub const SIZE_FROM_OTHER_PARENT: i32 = -2; /// A special value used for internal representation of special case in /// dirstate v1 format. pub const SIZE_NON_NORMAL: i32 = -1; #[derive(Debug, Default, Copy, Clone)] pub struct DirstateV2Data { pub wc_tracked: bool, pub p1_tracked: bool, pub p2_info: bool, pub mode_size: Option<(u32, u32)>, pub mtime: Option<TruncatedTimestamp>, pub fallback_exec: Option<bool>, pub fallback_symlink: Option<bool>, } #[derive(Debug, Default, Copy, Clone)] pub struct ParentFileData { pub mode_size: Option<(u32, u32)>, pub mtime: Option<TruncatedTimestamp>, } impl DirstateEntry { pub fn from_v2_data(v2_data: DirstateV2Data) -> Self { let DirstateV2Data { wc_tracked, p1_tracked, p2_info, mode_size, mtime, fallback_exec, fallback_symlink, } = v2_data; if let Some((mode, size)) = mode_size { // TODO: return an error for out of range values? assert!(mode & !RANGE_MASK_31BIT == 0); assert!(size & !RANGE_MASK_31BIT == 0); } let mut flags = Flags::empty(); flags.set(Flags::WDIR_TRACKED, wc_tracked); flags.set(Flags::P1_TRACKED, p1_tracked); flags.set(Flags::P2_INFO, p2_info); if let Some(exec) = fallback_exec { flags.insert(Flags::HAS_FALLBACK_EXEC); if exec { flags.insert(Flags::FALLBACK_EXEC); } } if let Some(exec) = fallback_symlink { flags.insert(Flags::HAS_FALLBACK_SYMLINK); if exec { flags.insert(Flags::FALLBACK_SYMLINK); } } Self { flags, mode_size, mtime, } } pub fn from_v1_data( state: EntryState, mode: i32, size: i32, mtime: i32, ) -> Self { match state { EntryState::Normal => { if size == SIZE_FROM_OTHER_PARENT { Self { // might be missing P1_TRACKED flags: Flags::WDIR_TRACKED | Flags::P2_INFO, mode_size: None, mtime: None, } } else if size == SIZE_NON_NORMAL { Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: None, mtime: None, } } else if mtime == MTIME_UNSET { // TODO: return an error for negative values? let mode = u32::try_from(mode).unwrap(); let size = u32::try_from(size).unwrap(); Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: Some((mode, size)), mtime: None, } } else { // TODO: return an error for negative values? let mode = u32::try_from(mode).unwrap(); let size = u32::try_from(size).unwrap(); let mtime = u32::try_from(mtime).unwrap(); let mtime = TruncatedTimestamp::from_already_truncated( mtime, 0, false, ) .unwrap(); Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: Some((mode, size)), mtime: Some(mtime), } } } EntryState::Added => Self { flags: Flags::WDIR_TRACKED, mode_size: None, mtime: None, }, EntryState::Removed => Self { flags: if size == SIZE_NON_NORMAL { Flags::P1_TRACKED | Flags::P2_INFO } else if size == SIZE_FROM_OTHER_PARENT { // We don’t know if P1_TRACKED should be set (file history) Flags::P2_INFO } else { Flags::P1_TRACKED }, mode_size: None, mtime: None, }, EntryState::Merged => Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED // might not be true because of rename ? | Flags::P2_INFO, // might not be true because of rename ? mode_size: None, mtime: None, }, } } /// Creates a new entry in "removed" state. /// /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or /// `SIZE_FROM_OTHER_PARENT` pub fn new_removed(size: i32) -> Self { Self::from_v1_data(EntryState::Removed, 0, size, 0) } pub fn new_tracked() -> Self { let data = DirstateV2Data { wc_tracked: true, ..Default::default() }; Self::from_v2_data(data) } pub fn tracked(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED) } pub fn p1_tracked(&self) -> bool { self.flags.contains(Flags::P1_TRACKED) } fn in_either_parent(&self) -> bool { self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO) } pub fn removed(&self) -> bool { self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED) } pub fn p2_info(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO) } pub fn added(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent() } pub fn modified(&self) -> bool { self.flags .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO) } pub fn maybe_clean(&self) -> bool { #[allow(clippy::if_same_then_else)] #[allow(clippy::needless_bool)] if !self.flags.contains(Flags::WDIR_TRACKED) { false } else if !self.flags.contains(Flags::P1_TRACKED) { false } else if self.flags.contains(Flags::P2_INFO) { false } else { true } } pub fn any_tracked(&self) -> bool { self.flags.intersects( Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, ) } pub(crate) fn v2_data(&self) -> DirstateV2Data { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v2_data of an untracked DirstateEntry") } let wc_tracked = self.flags.contains(Flags::WDIR_TRACKED); let p1_tracked = self.flags.contains(Flags::P1_TRACKED); let p2_info = self.flags.contains(Flags::P2_INFO); let mode_size = self.mode_size; let mtime = self.mtime; DirstateV2Data { wc_tracked, p1_tracked, p2_info, mode_size, mtime, fallback_exec: self.get_fallback_exec(), fallback_symlink: self.get_fallback_symlink(), } } fn v1_state(&self) -> EntryState { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v1_state of an untracked DirstateEntry") } if self.removed() { EntryState::Removed } else if self.modified() { EntryState::Merged } else if self.added() { EntryState::Added } else { EntryState::Normal } } fn v1_mode(&self) -> i32 { if let Some((mode, _size)) = self.mode_size { i32::try_from(mode).unwrap() } else { 0 } } fn v1_size(&self) -> i32 { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v1_size of an untracked DirstateEntry") } if self.removed() && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO) { SIZE_NON_NORMAL } else if self.flags.contains(Flags::P2_INFO) { SIZE_FROM_OTHER_PARENT } else if self.removed() { 0 } else if self.added() { SIZE_NON_NORMAL } else if let Some((_mode, size)) = self.mode_size { i32::try_from(size).unwrap() } else { SIZE_NON_NORMAL } } fn v1_mtime(&self) -> i32 { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v1_mtime of an untracked DirstateEntry") } #[allow(clippy::if_same_then_else)] if self.removed() { 0 } else if self.flags.contains(Flags::P2_INFO) { MTIME_UNSET } else if !self.flags.contains(Flags::P1_TRACKED) { MTIME_UNSET } else if let Some(mtime) = self.mtime { if mtime.second_ambiguous { MTIME_UNSET } else { i32::try_from(mtime.truncated_seconds()).unwrap() } } else { MTIME_UNSET } } // TODO: return `Option<EntryState>`? None when `!self.any_tracked` pub fn state(&self) -> EntryState { self.v1_state() } // TODO: return Option? pub fn mode(&self) -> i32 { self.v1_mode() } // TODO: return Option? pub fn size(&self) -> i32 { self.v1_size() } // TODO: return Option? pub fn mtime(&self) -> i32 { self.v1_mtime() } pub fn get_fallback_exec(&self) -> Option<bool> { if self.flags.contains(Flags::HAS_FALLBACK_EXEC) { Some(self.flags.contains(Flags::FALLBACK_EXEC)) } else { None } } pub fn set_fallback_exec(&mut self, value: Option<bool>) { match value { None => { self.flags.remove(Flags::HAS_FALLBACK_EXEC); self.flags.remove(Flags::FALLBACK_EXEC); } Some(exec) => { self.flags.insert(Flags::HAS_FALLBACK_EXEC); if exec { self.flags.insert(Flags::FALLBACK_EXEC); } } } } pub fn get_fallback_symlink(&self) -> Option<bool> { if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) { Some(self.flags.contains(Flags::FALLBACK_SYMLINK)) } else { None } } pub fn set_fallback_symlink(&mut self, value: Option<bool>) { match value { None => { self.flags.remove(Flags::HAS_FALLBACK_SYMLINK); self.flags.remove(Flags::FALLBACK_SYMLINK); } Some(symlink) => { self.flags.insert(Flags::HAS_FALLBACK_SYMLINK); if symlink { self.flags.insert(Flags::FALLBACK_SYMLINK); } } } } pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> { self.mtime } pub fn drop_merge_data(&mut self) { if self.flags.contains(Flags::P2_INFO) { self.flags.remove(Flags::P2_INFO); self.mode_size = None; self.mtime = None; } } pub fn set_possibly_dirty(&mut self) { self.mtime = None } pub fn set_clean( &mut self, mode: u32, size: u32, mtime: TruncatedTimestamp, ) { let size = size & RANGE_MASK_31BIT; self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED); self.mode_size = Some((mode, size)); self.mtime = Some(mtime); } pub fn set_tracked(&mut self) { self.flags.insert(Flags::WDIR_TRACKED); // `set_tracked` is replacing various `normallookup` call. So we mark // the files as needing lookup // // Consider dropping this in the future in favor of something less // broad. self.mtime = None; } pub fn set_untracked(&mut self) { self.flags.remove(Flags::WDIR_TRACKED); self.mode_size = None; self.mtime = None; } /// Returns `(state, mode, size, mtime)` for the puprose of serialization /// in the dirstate-v1 format. /// /// This includes marker values such as `mtime == -1`. In the future we may /// want to not represent these cases that way in memory, but serialization /// will need to keep the same format. pub fn v1_data(&self) -> (u8, i32, i32, i32) { ( self.v1_state().into(), self.v1_mode(), self.v1_size(), self.v1_mtime(), ) } pub(crate) fn is_from_other_parent(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO) } // TODO: other platforms #[cfg(unix)] pub fn mode_changed( &self, filesystem_metadata: &std::fs::Metadata, ) -> bool { let dirstate_exec_bit = (self.mode() as u32 & EXEC_BIT_MASK) != 0; let fs_exec_bit = has_exec_bit(filesystem_metadata); dirstate_exec_bit != fs_exec_bit } /// Returns a `(state, mode, size, mtime)` tuple as for /// `DirstateMapMethods::debug_iter`. pub fn debug_tuple(&self) -> (u8, i32, i32, i32) { (self.state().into(), self.mode(), self.size(), self.mtime()) } } impl EntryState { pub fn is_tracked(self) -> bool { use EntryState::*; match self { Normal | Added | Merged => true, Removed => false, } } } impl TryFrom<u8> for EntryState { type Error = HgError; fn try_from(value: u8) -> Result<Self, Self::Error> { match value { b'n' => Ok(EntryState::Normal), b'a' => Ok(EntryState::Added), b'r' => Ok(EntryState::Removed), b'm' => Ok(EntryState::Merged), _ => Err(HgError::CorruptedRepository(format!( "Incorrect dirstate entry state {}", value ))), } } } impl From<EntryState> for u8 { fn from(val: EntryState) -> Self { match val { EntryState::Normal => b'n', EntryState::Added => b'a', EntryState::Removed => b'r', EntryState::Merged => b'm', } } } const EXEC_BIT_MASK: u32 = 0o100; pub fn has_exec_bit(metadata: &std::fs::Metadata) -> bool { // TODO: How to handle executable permissions on Windows? use std::os::unix::fs::MetadataExt; (metadata.mode() & EXEC_BIT_MASK) != 0 }