Mercurial > hg
changeset 47348:a4de570e61fa
dirstate-v2: Allow tree nodes without an entry to store a timestamp
Timestamps are stored on 96 bits:
* 64 bits for the signed number of seconds since the Unix epoch
* 32 bits for the nanoseconds in the `0 <= ns < 1_000_000_000` range
For now timestamps are not used or set yet.
Differential Revision: https://phab.mercurial-scm.org/D10825
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Thu, 27 May 2021 18:40:54 +0200 |
parents | 73ddcedeaadf |
children | 7138c863d0a1 |
files | rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/on_disk.rs |
diffstat | 2 files changed, 200 insertions(+), 75 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri May 28 20:07:27 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Thu May 27 18:40:54 2021 +0200 @@ -295,18 +295,13 @@ } } - pub(super) fn has_entry(&self) -> bool { - match self { - NodeRef::InMemory(_path, node) => node.entry.is_some(), - NodeRef::OnDisk(node) => node.has_entry(), - } - } - pub(super) fn entry( &self, ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { match self { - NodeRef::InMemory(_path, node) => Ok(node.entry), + NodeRef::InMemory(_path, node) => { + Ok(node.data.as_entry().copied()) + } NodeRef::OnDisk(node) => node.entry(), } } @@ -316,7 +311,7 @@ ) -> Result<Option<EntryState>, DirstateV2ParseError> { match self { NodeRef::InMemory(_path, node) => { - Ok(node.entry.as_ref().map(|entry| entry.state)) + Ok(node.data.as_entry().map(|entry| entry.state)) } NodeRef::OnDisk(node) => node.state(), } @@ -333,8 +328,7 @@ /// Represents a file or a directory #[derive(Default)] pub(super) struct Node<'on_disk> { - /// `None` for directories - pub(super) entry: Option<DirstateEntry>, + pub(super) data: NodeData, pub(super) copy_source: Option<Cow<'on_disk, HgPath>>, @@ -344,6 +338,34 @@ pub(super) tracked_descendants_count: u32, } +pub(super) enum NodeData { + Entry(DirstateEntry), + CachedDirectory { mtime: on_disk::Timestamp }, + None, +} + +impl Default for NodeData { + fn default() -> Self { + NodeData::None + } +} + +impl NodeData { + fn has_entry(&self) -> bool { + match self { + NodeData::Entry(_) => true, + _ => false, + } + } + + fn as_entry(&self) -> Option<&DirstateEntry> { + match self { + NodeData::Entry(entry) => Some(entry), + _ => None, + } + } +} + impl<'on_disk> DirstateMap<'on_disk> { pub(super) fn empty(on_disk: &'on_disk [u8]) -> Self { Self { @@ -386,14 +408,14 @@ }, )?; assert!( - node.entry.is_none(), + !node.data.has_entry(), "duplicate dirstate entry in read" ); assert!( node.copy_source.is_none(), "duplicate dirstate entry in read" ); - node.entry = Some(*entry); + node.data = NodeData::Entry(*entry); node.copy_source = copy_source.map(Cow::Borrowed); map.nodes_with_entry_count += 1; if copy_source.is_some() { @@ -519,10 +541,10 @@ } }, )?; - if node.entry.is_none() { + if !node.data.has_entry() { self.nodes_with_entry_count += 1 } - node.entry = Some(new_entry); + node.data = NodeData::Entry(new_entry); Ok(()) } @@ -587,7 +609,7 @@ &mut self.root, path.as_ref(), )? { - if let Some(entry) = node.entry.as_mut() { + if let NodeData::Entry(entry) = &mut node.data { entry.clear_mtime(); } } @@ -703,18 +725,22 @@ return Ok(None); } } else { + let had_entry = node.data.has_entry(); + if had_entry { + node.data = NodeData::None + } dropped = Dropped { was_tracked: node - .entry - .as_ref() + .data + .as_entry() .map_or(false, |entry| entry.state.is_tracked()), - had_entry: node.entry.take().is_some(), + had_entry, had_copy_source: node.copy_source.take().is_some(), }; } // After recursion, for both leaf (rest_of_path is None) nodes and // parent nodes, remove a node if it just became empty. - if node.entry.is_none() + if !node.data.has_entry() && node.copy_source.is_none() && node.children.is_empty() { @@ -746,7 +772,7 @@ if let Some(node) = Self::get_node_mut(self.on_disk, &mut self.root, &filename)? { - if let Some(entry) = node.entry.as_mut() { + if let NodeData::Entry(entry) = &mut node.data { entry.clear_ambiguous_mtime(now); } } @@ -815,7 +841,8 @@ if let Some(node) = self.get_node(directory)? { // A node without a `DirstateEntry` was created to hold child // nodes, and is therefore a directory. - Ok(!node.has_entry() && node.tracked_descendants_count() > 0) + let state = node.state()?; + Ok(state.is_none() && node.tracked_descendants_count() > 0) } else { Ok(false) } @@ -825,7 +852,7 @@ if let Some(node) = self.get_node(directory)? { // A node without a `DirstateEntry` was created to hold child // nodes, and is therefore a directory. - Ok(!node.has_entry()) + Ok(node.state()?.is_none()) } else { Ok(false) }
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs Fri May 28 20:07:27 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Thu May 27 18:40:54 2021 +0200 @@ -17,10 +17,11 @@ use crate::DirstateError; use crate::DirstateParents; use crate::EntryState; -use bytes_cast::unaligned::{I32Be, U32Be, U64Be}; +use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be}; use bytes_cast::BytesCast; use std::borrow::Cow; -use std::convert::{TryFrom, TryInto}; +use std::convert::TryFrom; +use std::time::{Duration, SystemTime, UNIX_EPOCH}; /// Added at the start of `.hg/dirstate` when the "v2" format is used. /// This a redundant sanity check more than an actual "magic number" since @@ -50,22 +51,43 @@ base_name_start: Size, copy_source: OptPathSlice, - entry: OptEntry, children: ChildNodes, pub(super) tracked_descendants_count: Size, + + /// Dependending on the value of `state`: + /// + /// * A null byte: `data` represents nothing + /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together + /// represents a dirstate entry like in the v1 format. + /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted + /// as the `Timestamp` for the mtime of a cached directory. + /// + /// TODO: document directory caching + state: u8, + data: Entry, } -/// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1 -/// format #[derive(BytesCast, Copy, Clone)] #[repr(C)] -struct OptEntry { - state: u8, +struct Entry { mode: I32Be, mtime: I32Be, size: I32Be, } +/// Duration since the Unix epoch +#[derive(BytesCast, Copy, Clone)] +#[repr(C)] +pub(super) struct Timestamp { + seconds: I64Be, + + /// In `0 .. 1_000_000_000`. + /// + /// This timestamp is later or earlier than `(seconds, 0)` by this many + /// nanoseconds, if `seconds` is non-negative or negative, respectively. + nanoseconds: U32Be, +} + /// Counted in bytes from the start of the file /// /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB @@ -216,34 +238,54 @@ }) } - pub(super) fn has_entry(&self) -> bool { - self.entry.state != b'\0' + pub(super) fn node_data( + &self, + ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> { + let entry = |state| { + dirstate_map::NodeData::Entry(self.entry_with_given_state(state)) + }; + + match self.state { + b'\0' => Ok(dirstate_map::NodeData::None), + b'd' => Ok(dirstate_map::NodeData::CachedDirectory { + mtime: *self.data.as_timestamp(), + }), + b'n' => Ok(entry(EntryState::Normal)), + b'a' => Ok(entry(EntryState::Added)), + b'r' => Ok(entry(EntryState::Removed)), + b'm' => Ok(entry(EntryState::Merged)), + _ => Err(DirstateV2ParseError), + } } pub(super) fn state( &self, ) -> Result<Option<EntryState>, DirstateV2ParseError> { - Ok(if self.has_entry() { - Some( - self.entry - .state - .try_into() - .map_err(|_| DirstateV2ParseError)?, - ) - } else { - None - }) + match self.state { + b'\0' | b'd' => Ok(None), + b'n' => Ok(Some(EntryState::Normal)), + b'a' => Ok(Some(EntryState::Added)), + b'r' => Ok(Some(EntryState::Removed)), + b'm' => Ok(Some(EntryState::Merged)), + _ => Err(DirstateV2ParseError), + } + } + + fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry { + DirstateEntry { + state, + mode: self.data.mode.get(), + mtime: self.data.mtime.get(), + size: self.data.size.get(), + } } pub(super) fn entry( &self, ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { - Ok(self.state()?.map(|state| DirstateEntry { - state, - mode: self.entry.mode.get(), - mtime: self.entry.mtime.get(), - size: self.entry.size.get(), - })) + Ok(self + .state()? + .map(|state| self.entry_with_given_state(state))) } pub(super) fn children<'on_disk>( @@ -262,12 +304,58 @@ self.children(on_disk)?, ), copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), - entry: self.entry()?, + data: self.node_data()?, tracked_descendants_count: self.tracked_descendants_count.get(), }) } } +impl Entry { + fn from_timestamp(timestamp: Timestamp) -> Self { + // Safety: both types implement the `ByteCast` trait, so we could + // safely use `as_bytes` and `from_bytes` to do this conversion. Using + // `transmute` instead makes the compiler check that the two types + // have the same size, which eliminates the error case of + // `from_bytes`. + unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) } + } + + fn as_timestamp(&self) -> &Timestamp { + // Safety: same as above in `from_timestamp` + unsafe { &*(self as *const Entry as *const Timestamp) } + } +} + +impl From<&'_ SystemTime> for Timestamp { + fn from(system_time: &'_ SystemTime) -> Self { + let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) { + Ok(duration) => { + (duration.as_secs() as i64, duration.subsec_nanos()) + } + Err(error) => { + let negative = error.duration(); + (-(negative.as_secs() as i64), negative.subsec_nanos()) + } + }; + Timestamp { + seconds: secs.into(), + nanoseconds: nanos.into(), + } + } +} + +impl From<&'_ Timestamp> for SystemTime { + fn from(timestamp: &'_ Timestamp) -> Self { + let secs = timestamp.seconds.get(); + let nanos = timestamp.nanoseconds.get(); + if secs >= 0 { + UNIX_EPOCH + Duration::new(secs as u64, nanos) + } else { + UNIX_EPOCH - Duration::new((-secs) as u64, nanos) + } + } +} + fn read_hg_path( on_disk: &[u8], slice: Slice, @@ -356,33 +444,43 @@ } }; on_disk_nodes.push(match node { - NodeRef::InMemory(path, node) => Node { - children, - copy_source, - full_path, - base_name_start: u32::try_from(path.base_name_start()) - // Could only panic for paths over 4 GiB - .expect("dirstate-v2 offset overflow") - .into(), - tracked_descendants_count: node - .tracked_descendants_count - .into(), - entry: if let Some(entry) = &node.entry { - OptEntry { - state: entry.state.into(), - mode: entry.mode.into(), - mtime: entry.mtime.into(), - size: entry.size.into(), + NodeRef::InMemory(path, node) => { + let (state, data) = match &node.data { + dirstate_map::NodeData::Entry(entry) => ( + entry.state.into(), + Entry { + mode: entry.mode.into(), + mtime: entry.mtime.into(), + size: entry.size.into(), + }, + ), + dirstate_map::NodeData::CachedDirectory { mtime } => { + (b'd', Entry::from_timestamp(*mtime)) } - } else { - OptEntry { - state: b'\0', - mode: 0.into(), - mtime: 0.into(), - size: 0.into(), - } - }, - }, + dirstate_map::NodeData::None => ( + b'\0', + Entry { + mode: 0.into(), + mtime: 0.into(), + size: 0.into(), + }, + ), + }; + Node { + children, + copy_source, + full_path, + base_name_start: u32::try_from(path.base_name_start()) + // Could only panic for paths over 4 GiB + .expect("dirstate-v2 offset overflow") + .into(), + tracked_descendants_count: node + .tracked_descendants_count + .into(), + state, + data, + } + } NodeRef::OnDisk(node) => Node { children, copy_source,