Mercurial > hg
view rust/hg-core/src/dirstate/entry.rs @ 52305:79e8118cd846
rust-lib: move `Dirstate*Error` to the `dirstate` module
That's where they belong and should always have been there.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 04 Nov 2024 11:18:36 +0100 |
parents | db065b33fa56 |
children |
line wrap: on
line source
use crate::dirstate::on_disk::DirstateV2ParseError; use crate::errors::HgError; use bitflags::bitflags; use std::fs; use std::io; use std::time::{SystemTime, UNIX_EPOCH}; #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub enum EntryState { Normal, Added, Removed, Merged, } /// `size` and `mtime.seconds` are truncated to 31 bits. /// /// TODO: double-check status algorithm correctness for files /// larger than 2 GiB or modified after 2038. #[derive(Debug, Copy, Clone)] pub struct DirstateEntry { pub(crate) flags: Flags, mode_size: Option<(u32, u32)>, mtime: Option<TruncatedTimestamp>, } bitflags! { pub(crate) struct Flags: u8 { const WDIR_TRACKED = 1 << 0; const P1_TRACKED = 1 << 1; const P2_INFO = 1 << 2; const HAS_FALLBACK_EXEC = 1 << 3; const FALLBACK_EXEC = 1 << 4; const HAS_FALLBACK_SYMLINK = 1 << 5; const FALLBACK_SYMLINK = 1 << 6; } } /// A Unix timestamp with nanoseconds precision #[derive(Debug, Copy, Clone)] pub struct TruncatedTimestamp { truncated_seconds: u32, /// Always in the `0 .. 1_000_000_000` range. nanoseconds: u32, /// TODO this should be in DirstateEntry, but the current code needs /// refactoring to use DirstateEntry instead of TruncatedTimestamp for /// comparison. pub second_ambiguous: bool, } impl TruncatedTimestamp { /// Constructs from a timestamp potentially outside of the supported range, /// and truncate the seconds components to its lower 31 bits. /// /// Panics if the nanoseconds components is not in the expected range. pub fn new_truncate( seconds: i64, nanoseconds: u32, second_ambiguous: bool, ) -> Self { assert!(nanoseconds < NSEC_PER_SEC); Self { truncated_seconds: seconds as u32 & RANGE_MASK_31BIT, nanoseconds, second_ambiguous, } } /// Construct from components. Returns an error if they are not in the /// expcted range. pub fn from_already_truncated( truncated_seconds: u32, nanoseconds: u32, second_ambiguous: bool, ) -> Result<Self, DirstateV2ParseError> { if truncated_seconds & !RANGE_MASK_31BIT == 0 && nanoseconds < NSEC_PER_SEC { Ok(Self { truncated_seconds, nanoseconds, second_ambiguous, }) } else { Err(DirstateV2ParseError::new("when reading datetime")) } } /// Returns a `TruncatedTimestamp` for the modification time of `metadata`. /// /// Propagates errors from `std` on platforms where modification time /// is not available at all. pub fn for_mtime_of(metadata: &fs::Metadata) -> io::Result<Self> { #[cfg(unix)] { use std::os::unix::fs::MetadataExt; let seconds = metadata.mtime(); // i64 -> u32 with value always in the `0 .. NSEC_PER_SEC` range let nanoseconds = metadata.mtime_nsec().try_into().unwrap(); Ok(Self::new_truncate(seconds, nanoseconds, false)) } #[cfg(not(unix))] { metadata.modified().map(Self::from) } } /// Like `for_mtime_of`, but may return `None` or a value with /// `second_ambiguous` set if the mtime is not "reliable". /// /// A modification time is reliable if it is older than `boundary` (or /// sufficiently in the future). /// /// Otherwise a concurrent modification might happens with the same mtime. pub fn for_reliable_mtime_of( metadata: &fs::Metadata, boundary: &Self, ) -> io::Result<Option<Self>> { Ok(Self::for_mtime_of(metadata)?.for_reliable_mtime_of_self(boundary)) } /// See [`Self::for_reliable_mtime_of`] pub fn for_reliable_mtime_of_self(&self, boundary: &Self) -> Option<Self> { let mut new = *self; // If the mtime of the ambiguous file is younger (or equal) to the // starting point of the `status` walk, we cannot garantee that // another, racy, write will not happen right after with the same mtime // and we cannot cache the information. // // However if the mtime is far away in the future, this is likely some // mismatch between the current clock and previous file system // operation. So mtime more than one days in the future are considered // fine. let reliable = if self.truncated_seconds == boundary.truncated_seconds { new.second_ambiguous = true; self.nanoseconds != 0 && boundary.nanoseconds != 0 && self.nanoseconds < boundary.nanoseconds } else { // `truncated_seconds` is less than 2**31, // so this does not overflow `u32`: let one_day_later = boundary.truncated_seconds + 24 * 3600; self.truncated_seconds < boundary.truncated_seconds || self.truncated_seconds > one_day_later }; if reliable { Some(new) } else { None } } /// The lower 31 bits of the number of seconds since the epoch. pub fn truncated_seconds(&self) -> u32 { self.truncated_seconds } /// The sub-second component of this timestamp, in nanoseconds. /// Always in the `0 .. 1_000_000_000` range. /// /// This timestamp is after `(seconds, 0)` by this many nanoseconds. pub fn nanoseconds(&self) -> u32 { self.nanoseconds } /// Returns whether two timestamps are equal modulo 2**31 seconds. /// /// If this returns `true`, the original values converted from `SystemTime` /// or given to `new_truncate` were very likely equal. A false positive is /// possible if they were exactly a multiple of 2**31 seconds apart (around /// 68 years). This is deemed very unlikely to happen by chance, especially /// on filesystems that support sub-second precision. /// /// If someone is manipulating the modification times of some files to /// intentionally make `hg status` return incorrect results, not truncating /// wouldn’t help much since they can set exactly the expected timestamp. /// /// Sub-second precision is ignored if it is zero in either value. /// Some APIs simply return zero when more precision is not available. /// When comparing values from different sources, if only one is truncated /// in that way, doing a simple comparison would cause many false /// negatives. pub fn likely_equal(self, other: Self) -> bool { if self.truncated_seconds != other.truncated_seconds { false } else if self.nanoseconds == 0 || other.nanoseconds == 0 { !self.second_ambiguous } else { self.nanoseconds == other.nanoseconds } } pub fn likely_equal_to_mtime_of( self, metadata: &fs::Metadata, ) -> io::Result<bool> { Ok(self.likely_equal(Self::for_mtime_of(metadata)?)) } } impl From<SystemTime> for TruncatedTimestamp { fn from(system_time: SystemTime) -> Self { // On Unix, `SystemTime` is a wrapper for the `timespec` C struct: // https://www.gnu.org/software/libc/manual/html_node/Time-Types.html#index-struct-timespec // We want to effectively access its fields, but the Rust standard // library does not expose them. The best we can do is: let seconds; let nanoseconds; match system_time.duration_since(UNIX_EPOCH) { Ok(duration) => { seconds = duration.as_secs() as i64; nanoseconds = duration.subsec_nanos(); } Err(error) => { // `system_time` is before `UNIX_EPOCH`. // We need to undo this algorithm: // https://github.com/rust-lang/rust/blob/6bed1f0bc3cc50c10aab26d5f94b16a00776b8a5/library/std/src/sys/unix/time.rs#L40-L41 let negative = error.duration(); let negative_secs = negative.as_secs() as i64; let negative_nanos = negative.subsec_nanos(); if negative_nanos == 0 { seconds = -negative_secs; nanoseconds = 0; } else { // For example if `system_time` was 4.3 seconds before // the Unix epoch we get a Duration that represents // `(-4, -0.3)` but we want `(-5, +0.7)`: seconds = -1 - negative_secs; nanoseconds = NSEC_PER_SEC - negative_nanos; } } }; Self::new_truncate(seconds, nanoseconds, false) } } const NSEC_PER_SEC: u32 = 1_000_000_000; pub const RANGE_MASK_31BIT: u32 = 0x7FFF_FFFF; pub const MTIME_UNSET: i32 = -1; /// A `DirstateEntry` with a size of `-2` means that it was merged from the /// other parent. This allows revert to pick the right status back during a /// merge. pub const SIZE_FROM_OTHER_PARENT: i32 = -2; /// A special value used for internal representation of special case in /// dirstate v1 format. pub const SIZE_NON_NORMAL: i32 = -1; #[derive(Debug, Default, Copy, Clone)] pub struct DirstateV2Data { pub wc_tracked: bool, pub p1_tracked: bool, pub p2_info: bool, pub mode_size: Option<(u32, u32)>, pub mtime: Option<TruncatedTimestamp>, pub fallback_exec: Option<bool>, pub fallback_symlink: Option<bool>, } #[derive(Debug, Default, Copy, Clone)] pub struct ParentFileData { pub mode_size: Option<(u32, u32)>, pub mtime: Option<TruncatedTimestamp>, } impl DirstateEntry { pub fn from_v2_data(v2_data: DirstateV2Data) -> Self { let DirstateV2Data { wc_tracked, p1_tracked, p2_info, mode_size, mtime, fallback_exec, fallback_symlink, } = v2_data; if let Some((mode, size)) = mode_size { // TODO: return an error for out of range values? assert!(mode & !RANGE_MASK_31BIT == 0); assert!(size & !RANGE_MASK_31BIT == 0); } let mut flags = Flags::empty(); flags.set(Flags::WDIR_TRACKED, wc_tracked); flags.set(Flags::P1_TRACKED, p1_tracked); flags.set(Flags::P2_INFO, p2_info); if let Some(exec) = fallback_exec { flags.insert(Flags::HAS_FALLBACK_EXEC); if exec { flags.insert(Flags::FALLBACK_EXEC); } } if let Some(exec) = fallback_symlink { flags.insert(Flags::HAS_FALLBACK_SYMLINK); if exec { flags.insert(Flags::FALLBACK_SYMLINK); } } Self { flags, mode_size, mtime, } } pub fn from_v1_data( state: EntryState, mode: i32, size: i32, mtime: i32, ) -> Self { match state { EntryState::Normal => { if size == SIZE_FROM_OTHER_PARENT { Self { // might be missing P1_TRACKED flags: Flags::WDIR_TRACKED | Flags::P2_INFO, mode_size: None, mtime: None, } } else if size == SIZE_NON_NORMAL { Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: None, mtime: None, } } else if mtime == MTIME_UNSET { // TODO: return an error for negative values? let mode = u32::try_from(mode).unwrap(); let size = u32::try_from(size).unwrap(); Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: Some((mode, size)), mtime: None, } } else { // TODO: return an error for negative values? let mode = u32::try_from(mode).unwrap(); let size = u32::try_from(size).unwrap(); let mtime = u32::try_from(mtime).unwrap(); let mtime = TruncatedTimestamp::from_already_truncated( mtime, 0, false, ) .unwrap(); Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED, mode_size: Some((mode, size)), mtime: Some(mtime), } } } EntryState::Added => Self { flags: Flags::WDIR_TRACKED, mode_size: None, mtime: None, }, EntryState::Removed => Self { flags: if size == SIZE_NON_NORMAL { Flags::P1_TRACKED | Flags::P2_INFO } else if size == SIZE_FROM_OTHER_PARENT { // We don’t know if P1_TRACKED should be set (file history) Flags::P2_INFO } else { Flags::P1_TRACKED }, mode_size: None, mtime: None, }, EntryState::Merged => Self { flags: Flags::WDIR_TRACKED | Flags::P1_TRACKED // might not be true because of rename ? | Flags::P2_INFO, // might not be true because of rename ? mode_size: None, mtime: None, }, } } /// Creates a new entry in "removed" state. /// /// `size` is expected to be zero, `SIZE_NON_NORMAL`, or /// `SIZE_FROM_OTHER_PARENT` pub fn new_removed(size: i32) -> Self { Self::from_v1_data(EntryState::Removed, 0, size, 0) } pub fn new_tracked() -> Self { let data = DirstateV2Data { wc_tracked: true, ..Default::default() }; Self::from_v2_data(data) } pub fn tracked(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED) } pub fn p1_tracked(&self) -> bool { self.flags.contains(Flags::P1_TRACKED) } fn in_either_parent(&self) -> bool { self.flags.intersects(Flags::P1_TRACKED | Flags::P2_INFO) } pub fn removed(&self) -> bool { self.in_either_parent() && !self.flags.contains(Flags::WDIR_TRACKED) } pub fn p2_info(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO) } pub fn added(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED) && !self.in_either_parent() } pub fn modified(&self) -> bool { self.flags .contains(Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO) } pub fn maybe_clean(&self) -> bool { #[allow(clippy::if_same_then_else)] #[allow(clippy::needless_bool)] if !self.flags.contains(Flags::WDIR_TRACKED) { false } else if !self.flags.contains(Flags::P1_TRACKED) { false } else if self.flags.contains(Flags::P2_INFO) { false } else { true } } pub fn any_tracked(&self) -> bool { self.flags.intersects( Flags::WDIR_TRACKED | Flags::P1_TRACKED | Flags::P2_INFO, ) } pub(crate) fn v2_data(&self) -> DirstateV2Data { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v2_data of an untracked DirstateEntry") } let wc_tracked = self.flags.contains(Flags::WDIR_TRACKED); let p1_tracked = self.flags.contains(Flags::P1_TRACKED); let p2_info = self.flags.contains(Flags::P2_INFO); let mode_size = self.mode_size; let mtime = self.mtime; DirstateV2Data { wc_tracked, p1_tracked, p2_info, mode_size, mtime, fallback_exec: self.get_fallback_exec(), fallback_symlink: self.get_fallback_symlink(), } } fn v1_state(&self) -> EntryState { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v1_state of an untracked DirstateEntry") } if self.removed() { EntryState::Removed } else if self.modified() { EntryState::Merged } else if self.added() { EntryState::Added } else { EntryState::Normal } } fn v1_mode(&self) -> i32 { if let Some((mode, _size)) = self.mode_size { i32::try_from(mode).unwrap() } else { 0 } } fn v1_size(&self) -> i32 { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v1_size of an untracked DirstateEntry") } if self.removed() && self.flags.contains(Flags::P1_TRACKED | Flags::P2_INFO) { SIZE_NON_NORMAL } else if self.flags.contains(Flags::P2_INFO) { SIZE_FROM_OTHER_PARENT } else if self.removed() { 0 } else if self.added() { SIZE_NON_NORMAL } else if let Some((_mode, size)) = self.mode_size { i32::try_from(size).unwrap() } else { SIZE_NON_NORMAL } } fn v1_mtime(&self) -> i32 { if !self.any_tracked() { // TODO: return an Option instead? panic!("Accessing v1_mtime of an untracked DirstateEntry") } #[allow(clippy::if_same_then_else)] if self.removed() { 0 } else if self.flags.contains(Flags::P2_INFO) { MTIME_UNSET } else if !self.flags.contains(Flags::P1_TRACKED) { MTIME_UNSET } else if let Some(mtime) = self.mtime { if mtime.second_ambiguous { MTIME_UNSET } else { i32::try_from(mtime.truncated_seconds()).unwrap() } } else { MTIME_UNSET } } // TODO: return `Option<EntryState>`? None when `!self.any_tracked` pub fn state(&self) -> EntryState { self.v1_state() } // TODO: return Option? pub fn mode(&self) -> i32 { self.v1_mode() } // TODO: return Option? pub fn size(&self) -> i32 { self.v1_size() } // TODO: return Option? pub fn mtime(&self) -> i32 { self.v1_mtime() } pub fn get_fallback_exec(&self) -> Option<bool> { if self.flags.contains(Flags::HAS_FALLBACK_EXEC) { Some(self.flags.contains(Flags::FALLBACK_EXEC)) } else { None } } pub fn set_fallback_exec(&mut self, value: Option<bool>) { match value { None => { self.flags.remove(Flags::HAS_FALLBACK_EXEC); self.flags.remove(Flags::FALLBACK_EXEC); } Some(exec) => { self.flags.insert(Flags::HAS_FALLBACK_EXEC); if exec { self.flags.insert(Flags::FALLBACK_EXEC); } } } } pub fn get_fallback_symlink(&self) -> Option<bool> { if self.flags.contains(Flags::HAS_FALLBACK_SYMLINK) { Some(self.flags.contains(Flags::FALLBACK_SYMLINK)) } else { None } } pub fn set_fallback_symlink(&mut self, value: Option<bool>) { match value { None => { self.flags.remove(Flags::HAS_FALLBACK_SYMLINK); self.flags.remove(Flags::FALLBACK_SYMLINK); } Some(symlink) => { self.flags.insert(Flags::HAS_FALLBACK_SYMLINK); if symlink { self.flags.insert(Flags::FALLBACK_SYMLINK); } } } } pub fn truncated_mtime(&self) -> Option<TruncatedTimestamp> { self.mtime } pub fn drop_merge_data(&mut self) { if self.flags.contains(Flags::P2_INFO) { self.flags.remove(Flags::P2_INFO); self.mode_size = None; self.mtime = None; } } pub fn set_possibly_dirty(&mut self) { self.mtime = None } pub fn set_clean( &mut self, mode: u32, size: u32, mtime: TruncatedTimestamp, ) { let size = size & RANGE_MASK_31BIT; self.flags.insert(Flags::WDIR_TRACKED | Flags::P1_TRACKED); self.mode_size = Some((mode, size)); self.mtime = Some(mtime); } pub fn set_tracked(&mut self) { self.flags.insert(Flags::WDIR_TRACKED); // `set_tracked` is replacing various `normallookup` call. So we mark // the files as needing lookup // // Consider dropping this in the future in favor of something less // broad. self.mtime = None; } pub fn set_untracked(&mut self) { self.flags.remove(Flags::WDIR_TRACKED); self.mode_size = None; self.mtime = None; } /// Returns `(state, mode, size, mtime)` for the puprose of serialization /// in the dirstate-v1 format. /// /// This includes marker values such as `mtime == -1`. In the future we may /// want to not represent these cases that way in memory, but serialization /// will need to keep the same format. pub fn v1_data(&self) -> (u8, i32, i32, i32) { ( self.v1_state().into(), self.v1_mode(), self.v1_size(), self.v1_mtime(), ) } pub(crate) fn is_from_other_parent(&self) -> bool { self.flags.contains(Flags::WDIR_TRACKED | Flags::P2_INFO) } // TODO: other platforms #[cfg(unix)] pub fn mode_changed( &self, filesystem_metadata: &std::fs::Metadata, ) -> bool { let dirstate_exec_bit = (self.mode() as u32 & EXEC_BIT_MASK) != 0; let fs_exec_bit = has_exec_bit(filesystem_metadata); dirstate_exec_bit != fs_exec_bit } /// Returns a `(state, mode, size, mtime)` tuple as for /// `DirstateMapMethods::debug_iter`. pub fn debug_tuple(&self) -> (u8, i32, i32, i32) { (self.state().into(), self.mode(), self.size(), self.mtime()) } } impl EntryState { pub fn is_tracked(self) -> bool { use EntryState::*; match self { Normal | Added | Merged => true, Removed => false, } } } impl TryFrom<u8> for EntryState { type Error = HgError; fn try_from(value: u8) -> Result<Self, Self::Error> { match value { b'n' => Ok(EntryState::Normal), b'a' => Ok(EntryState::Added), b'r' => Ok(EntryState::Removed), b'm' => Ok(EntryState::Merged), _ => Err(HgError::CorruptedRepository(format!( "Incorrect dirstate entry state {}", value ))), } } } impl From<EntryState> for u8 { fn from(val: EntryState) -> Self { match val { EntryState::Normal => b'n', EntryState::Added => b'a', EntryState::Removed => b'r', EntryState::Merged => b'm', } } } const EXEC_BIT_MASK: u32 = 0o100; pub fn has_exec_bit(metadata: &std::fs::Metadata) -> bool { // TODO: How to handle executable permissions on Windows? use std::os::unix::fs::MetadataExt; (metadata.mode() & EXEC_BIT_MASK) != 0 }