diff rust/hg-core/src/dirstate_tree/on_disk.rs @ 47349:7138c863d0a1

dirstate-v2: Skip readdir in status based on directory mtime When calling `read_dir` during `status` and the directory is found to be eligible for caching (see code comments), write the directory’s mtime to the dirstate. The presence of a directory mtime in the dirstate is meaningful and indicates eligibility. When an eligible directory mtime is found in the dirstate and `stat()` shows that the mtime has not changed, `status` can skip calling `read_dir` again and instead rely on the names of child nodes in the dirstate tree. The `tempfile` crate is used to create a temporary file in order to use its modification time as "current time" with the same truncation as other files and directories would have in their own modification time. Differential Revision: https://phab.mercurial-scm.org/D10826
author Simon Sapin <simon.sapin@octobus.net>
date Fri, 28 May 2021 11:48:59 +0200
parents a4de570e61fa
children 3b9914b28133
line wrap: on
line diff
--- a/rust/hg-core/src/dirstate_tree/on_disk.rs	Thu May 27 18:40:54 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Fri May 28 11:48:59 2021 +0200
@@ -56,13 +56,31 @@
 
     /// Dependending on the value of `state`:
     ///
-    /// * A null byte: `data` represents nothing
+    /// * A null byte: `data` is not used.
+    ///
     /// * A `n`, `a`, `r`, or `m` ASCII byte: `state` and `data` together
-    ///   represents a dirstate entry like in the v1 format.
+    ///   represent a dirstate entry like in the v1 format.
+    ///
     /// * A `d` ASCII byte: the bytes of `data` should instead be interpreted
     ///   as the `Timestamp` for the mtime of a cached directory.
     ///
-    /// TODO: document directory caching
+    ///   The presence of this state means that at some point, this path in
+    ///   the working directory was observed:
+    ///
+    ///   - To be a directory
+    ///   - With the modification time as given by `Timestamp`
+    ///   - That timestamp was already strictly in the past when observed,
+    ///     meaning that later changes cannot happen in the same clock tick
+    ///     and must cause a different modification time (unless the system
+    ///     clock jumps back and we get unlucky, which is not impossible but
+    ///     but deemed unlikely enough).
+    ///   - The directory did not contain any child entry that did not have a
+    ///     corresponding dirstate node.
+    ///
+    ///   This means that if `std::fs::symlink_metadata` later reports the
+    ///   same modification time, we don’t need to call `std::fs::read_dir`
+    ///   again for this directory and can iterate child dirstate nodes
+    ///   instead.
     state: u8,
     data: Entry,
 }
@@ -76,7 +94,7 @@
 }
 
 /// Duration since the Unix epoch
-#[derive(BytesCast, Copy, Clone)]
+#[derive(BytesCast, Copy, Clone, PartialEq)]
 #[repr(C)]
 pub(super) struct Timestamp {
     seconds: I64Be,
@@ -258,6 +276,14 @@
         }
     }
 
+    pub(super) fn cached_directory_mtime(&self) -> Option<&Timestamp> {
+        if self.state == b'd' {
+            Some(self.data.as_timestamp())
+        } else {
+            None
+        }
+    }
+
     pub(super) fn state(
         &self,
     ) -> Result<Option<EntryState>, DirstateV2ParseError> {
@@ -326,8 +352,8 @@
     }
 }
 
-impl From<&'_ SystemTime> for Timestamp {
-    fn from(system_time: &'_ SystemTime) -> Self {
+impl From<SystemTime> for Timestamp {
+    fn from(system_time: SystemTime) -> Self {
         let (secs, nanos) = match system_time.duration_since(UNIX_EPOCH) {
             Ok(duration) => {
                 (duration.as_secs() as i64, duration.subsec_nanos())