comparison rust/hg-core/src/dirstate_tree/on_disk.rs @ 47348:a4de570e61fa

dirstate-v2: Allow tree nodes without an entry to store a timestamp Timestamps are stored on 96 bits: * 64 bits for the signed number of seconds since the Unix epoch * 32 bits for the nanoseconds in the `0 <= ns < 1_000_000_000` range For now timestamps are not used or set yet. Differential Revision: https://phab.mercurial-scm.org/D10825
author Simon Sapin <simon.sapin@octobus.net>
date Thu, 27 May 2021 18:40:54 +0200
parents 0654b3b3d2b5
children 7138c863d0a1
comparison
equal deleted inserted replaced
47347:73ddcedeaadf 47348:a4de570e61fa
15 use crate::utils::hg_path::HgPath; 15 use crate::utils::hg_path::HgPath;
16 use crate::DirstateEntry; 16 use crate::DirstateEntry;
17 use crate::DirstateError; 17 use crate::DirstateError;
18 use crate::DirstateParents; 18 use crate::DirstateParents;
19 use crate::EntryState; 19 use crate::EntryState;
20 use bytes_cast::unaligned::{I32Be, U32Be, U64Be}; 20 use bytes_cast::unaligned::{I32Be, I64Be, U32Be, U64Be};
21 use bytes_cast::BytesCast; 21 use bytes_cast::BytesCast;
22 use std::borrow::Cow; 22 use std::borrow::Cow;
23 use std::convert::{TryFrom, TryInto}; 23 use std::convert::TryFrom;
24 use std::time::{Duration, SystemTime, UNIX_EPOCH};
24 25
25 /// Added at the start of `.hg/dirstate` when the "v2" format is used. 26 /// Added at the start of `.hg/dirstate` when the "v2" format is used.
26 /// This a redundant sanity check more than an actual "magic number" since 27 /// This a redundant sanity check more than an actual "magic number" since
27 /// `.hg/requires` already governs which format should be used. 28 /// `.hg/requires` already governs which format should be used.
28 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n"; 29 pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
48 49
49 /// In bytes from `self.full_path.start` 50 /// In bytes from `self.full_path.start`
50 base_name_start: Size, 51 base_name_start: Size,
51 52
52 copy_source: OptPathSlice, 53 copy_source: OptPathSlice,
53 entry: OptEntry,
54 children: ChildNodes, 54 children: ChildNodes,
55 pub(super) tracked_descendants_count: Size, 55 pub(super) tracked_descendants_count: Size,
56 } 56
57 57 /// Dependending on the value of `state`:
58 /// Either nothing if `state == b'\0'`, or a dirstate entry like in the v1 58 ///
59 /// format 59 /// * A null byte: `data` represents nothing
60 /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
61 /// represents a dirstate entry like in the v1 format.
62 /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
63 /// as the `Timestamp` for the mtime of a cached directory.
64 ///
65 /// TODO: document directory caching
66 state: u8,
67 data: Entry,
68 }
69
60 #[derive(BytesCast, Copy, Clone)] 70 #[derive(BytesCast, Copy, Clone)]
61 #[repr(C)] 71 #[repr(C)]
62 struct OptEntry { 72 struct Entry {
63 state: u8,
64 mode: I32Be, 73 mode: I32Be,
65 mtime: I32Be, 74 mtime: I32Be,
66 size: I32Be, 75 size: I32Be,
76 }
77
78 /// Duration since the Unix epoch
79 #[derive(BytesCast, Copy, Clone)]
80 #[repr(C)]
81 pub(super) struct Timestamp {
82 seconds: I64Be,
83
84 /// In `0 .. 1_000_000_000`.
85 ///
86 /// This timestamp is later or earlier than `(seconds, 0)` by this many
87 /// nanoseconds, if `seconds` is non-negative or negative, respectively.
88 nanoseconds: U32Be,
67 } 89 }
68 90
69 /// Counted in bytes from the start of the file 91 /// Counted in bytes from the start of the file
70 /// 92 ///
71 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB 93 /// NOTE: If we decide to never support `.hg/dirstate` files larger than 4 GiB
214 } else { 236 } else {
215 None 237 None
216 }) 238 })
217 } 239 }
218 240
219 pub(super) fn has_entry(&self) -> bool { 241 pub(super) fn node_data(
220 self.entry.state != b'\0' 242 &self,
243 ) -> Result<dirstate_map::NodeData, DirstateV2ParseError> {
244 let entry = |state| {
245 dirstate_map::NodeData::Entry(self.entry_with_given_state(state))
246 };
247
248 match self.state {
249 b'\0' => Ok(dirstate_map::NodeData::None),
250 b'd' => Ok(dirstate_map::NodeData::CachedDirectory {
251 mtime: *self.data.as_timestamp(),
252 }),
253 b'n' => Ok(entry(EntryState::Normal)),
254 b'a' => Ok(entry(EntryState::Added)),
255 b'r' => Ok(entry(EntryState::Removed)),
256 b'm' => Ok(entry(EntryState::Merged)),
257 _ => Err(DirstateV2ParseError),
258 }
221 } 259 }
222 260
223 pub(super) fn state( 261 pub(super) fn state(
224 &self, 262 &self,
225 ) -> Result<Option<EntryState>, DirstateV2ParseError> { 263 ) -> Result<Option<EntryState>, DirstateV2ParseError> {
226 Ok(if self.has_entry() { 264 match self.state {
227 Some( 265 b'\0' | b'd' => Ok(None),
228 self.entry 266 b'n' => Ok(Some(EntryState::Normal)),
229 .state 267 b'a' => Ok(Some(EntryState::Added)),
230 .try_into() 268 b'r' => Ok(Some(EntryState::Removed)),
231 .map_err(|_| DirstateV2ParseError)?, 269 b'm' => Ok(Some(EntryState::Merged)),
232 ) 270 _ => Err(DirstateV2ParseError),
233 } else { 271 }
234 None 272 }
235 }) 273
274 fn entry_with_given_state(&self, state: EntryState) -> DirstateEntry {
275 DirstateEntry {
276 state,
277 mode: self.data.mode.get(),
278 mtime: self.data.mtime.get(),
279 size: self.data.size.get(),
280 }
236 } 281 }
237 282
238 pub(super) fn entry( 283 pub(super) fn entry(
239 &self, 284 &self,
240 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> { 285 ) -> Result<Option<DirstateEntry>, DirstateV2ParseError> {
241 Ok(self.state()?.map(|state| DirstateEntry { 286 Ok(self
242 state, 287 .state()?
243 mode: self.entry.mode.get(), 288 .map(|state| self.entry_with_given_state(state)))
244 mtime: self.entry.mtime.get(),
245 size: self.entry.size.get(),
246 }))
247 } 289 }
248 290
249 pub(super) fn children<'on_disk>( 291 pub(super) fn children<'on_disk>(
250 &self, 292 &self,
251 on_disk: &'on_disk [u8], 293 on_disk: &'on_disk [u8],
260 Ok(dirstate_map::Node { 302 Ok(dirstate_map::Node {
261 children: dirstate_map::ChildNodes::OnDisk( 303 children: dirstate_map::ChildNodes::OnDisk(
262 self.children(on_disk)?, 304 self.children(on_disk)?,
263 ), 305 ),
264 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed), 306 copy_source: self.copy_source(on_disk)?.map(Cow::Borrowed),
265 entry: self.entry()?, 307 data: self.node_data()?,
266 tracked_descendants_count: self.tracked_descendants_count.get(), 308 tracked_descendants_count: self.tracked_descendants_count.get(),
267 }) 309 })
310 }
311 }
312
313 impl Entry {
314 fn from_timestamp(timestamp: Timestamp) -> Self {
315 // Safety: both types implement the `ByteCast` trait, so we could
316 // safely use `as_bytes` and `from_bytes` to do this conversion. Using
317 // `transmute` instead makes the compiler check that the two types
318 // have the same size, which eliminates the error case of
319 // `from_bytes`.
320 unsafe { std::mem::transmute::<Timestamp, Entry>(timestamp) }
321 }
322
323 fn as_timestamp(&self) -> &Timestamp {
324 // Safety: same as above in `from_timestamp`
325 unsafe { &*(self as *const Entry as *const Timestamp) }
326 }
327 }
328
329 impl From<&'_ SystemTime> for Timestamp {
330 fn from(system_time: &'_ SystemTime) -> Self {
331 let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
332 Ok(duration) => {
333 (duration.as_secs() as i64, duration.subsec_nanos())
334 }
335 Err(error) => {
336 let negative = error.duration();
337 (-(negative.as_secs() as i64), negative.subsec_nanos())
338 }
339 };
340 Timestamp {
341 seconds: secs.into(),
342 nanoseconds: nanos.into(),
343 }
344 }
345 }
346
347 impl From<&'_ Timestamp> for SystemTime {
348 fn from(timestamp: &'_ Timestamp) -> Self {
349 let secs = timestamp.seconds.get();
350 let nanos = timestamp.nanoseconds.get();
351 if secs >= 0 {
352 UNIX_EPOCH + Duration::new(secs as u64, nanos)
353 } else {
354 UNIX_EPOCH - Duration::new((-secs) as u64, nanos)
355 }
268 } 356 }
269 } 357 }
270 358
271 fn read_hg_path( 359 fn read_hg_path(
272 on_disk: &[u8], 360 on_disk: &[u8],
354 start: 0.into(), 442 start: 0.into(),
355 len: 0.into(), 443 len: 0.into(),
356 } 444 }
357 }; 445 };
358 on_disk_nodes.push(match node { 446 on_disk_nodes.push(match node {
359 NodeRef::InMemory(path, node) => Node { 447 NodeRef::InMemory(path, node) => {
360 children, 448 let (state, data) = match &node.data {
361 copy_source, 449 dirstate_map::NodeData::Entry(entry) => (
362 full_path, 450 entry.state.into(),
363 base_name_start: u32::try_from(path.base_name_start()) 451 Entry {
364 // Could only panic for paths over 4 GiB 452 mode: entry.mode.into(),
365 .expect("dirstate-v2 offset overflow") 453 mtime: entry.mtime.into(),
366 .into(), 454 size: entry.size.into(),
367 tracked_descendants_count: node 455 },
368 .tracked_descendants_count 456 ),
369 .into(), 457 dirstate_map::NodeData::CachedDirectory { mtime } => {
370 entry: if let Some(entry) = &node.entry { 458 (b'd', Entry::from_timestamp(*mtime))
371 OptEntry {
372 state: entry.state.into(),
373 mode: entry.mode.into(),
374 mtime: entry.mtime.into(),
375 size: entry.size.into(),
376 } 459 }
377 } else { 460 dirstate_map::NodeData::None => (
378 OptEntry { 461 b'\0',
379 state: b'\0', 462 Entry {
380 mode: 0.into(), 463 mode: 0.into(),
381 mtime: 0.into(), 464 mtime: 0.into(),
382 size: 0.into(), 465 size: 0.into(),
383 } 466 },
384 }, 467 ),
385 }, 468 };
469 Node {
470 children,
471 copy_source,
472 full_path,
473 base_name_start: u32::try_from(path.base_name_start())
474 // Could only panic for paths over 4 GiB
475 .expect("dirstate-v2 offset overflow")
476 .into(),
477 tracked_descendants_count: node
478 .tracked_descendants_count
479 .into(),
480 state,
481 data,
482 }
483 }
386 NodeRef::OnDisk(node) => Node { 484 NodeRef::OnDisk(node) => Node {
387 children, 485 children,
388 copy_source, 486 copy_source,
389 full_path, 487 full_path,
390 ..*node 488 ..*node