dirstate-v2: Change the on-disk format when the requirement is enabled
authorSimon Sapin <simon.sapin@octobus.net>
Wed, 19 May 2021 13:15:00 +0200
changeset 47280 1766130fe9ba
parent 47279 40b51c28b242
child 47281 6763913fa175
dirstate-v2: Change the on-disk format when the requirement is enabled For now, the format is the same except with an additional marker at the start. This marker is redundant: for existing repositories it is `.hg/requires` that determines which format to use. For new repositories, it is the new `format.exp-dirstate-v2` config. There is no upgrade or downgrade so far. Most of the changes are about plumbing a boolean through layers of APIs to indicate which format should be used. Differential Revision: https://phab.mercurial-scm.org/D10719
hgext/largefiles/lfutil.py
mercurial/dirstate.py
mercurial/interfaces/dirstate.py
mercurial/localrepo.py
rust/hg-core/src/dirstate_tree.rs
rust/hg-core/src/dirstate_tree/dirstate_map.rs
rust/hg-core/src/dirstate_tree/dispatch.rs
rust/hg-core/src/dirstate_tree/on_disk.rs
rust/hg-cpython/src/dirstate.rs
rust/hg-cpython/src/dirstate/dirstate_map.rs
rust/hg-cpython/src/dirstate/dispatch.rs
rust/hg-cpython/src/dirstate/owning.rs
--- a/hgext/largefiles/lfutil.py	Wed May 19 13:15:00 2021 +0200
+++ b/hgext/largefiles/lfutil.py	Wed May 19 13:15:00 2021 +0200
@@ -25,6 +25,7 @@
     httpconnection,
     match as matchmod,
     pycompat,
+    requirements,
     scmutil,
     sparse,
     util,
@@ -197,6 +198,7 @@
     vfs = repo.vfs
     lfstoredir = longname
     opener = vfsmod.vfs(vfs.join(lfstoredir))
+    use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
     lfdirstate = largefilesdirstate(
         opener,
         ui,
@@ -204,6 +206,7 @@
         repo.dirstate._validate,
         lambda: sparse.matcher(repo),
         repo.nodeconstants,
+        use_dirstate_v2,
     )
 
     # If the largefiles dirstate does not exist, populate and create
--- a/mercurial/dirstate.py	Wed May 19 13:15:00 2021 +0200
+++ b/mercurial/dirstate.py	Wed May 19 13:15:00 2021 +0200
@@ -75,7 +75,14 @@
 @interfaceutil.implementer(intdirstate.idirstate)
 class dirstate(object):
     def __init__(
-        self, opener, ui, root, validate, sparsematchfn, nodeconstants
+        self,
+        opener,
+        ui,
+        root,
+        validate,
+        sparsematchfn,
+        nodeconstants,
+        use_dirstate_v2,
     ):
         """Create a new dirstate object.
 
@@ -83,6 +90,7 @@
         dirstate file; root is the root of the directory tracked by
         the dirstate.
         """
+        self._use_dirstate_v2 = use_dirstate_v2
         self._nodeconstants = nodeconstants
         self._opener = opener
         self._validate = validate
@@ -141,7 +149,11 @@
     def _map(self):
         """Return the dirstate contents (see documentation for dirstatemap)."""
         self._map = self._mapcls(
-            self._ui, self._opener, self._root, self._nodeconstants
+            self._ui,
+            self._opener,
+            self._root,
+            self._nodeconstants,
+            self._use_dirstate_v2,
         )
         return self._map
 
@@ -1435,13 +1447,16 @@
       denormalized form that they appear as in the dirstate.
     """
 
-    def __init__(self, ui, opener, root, nodeconstants):
+    def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
         self._ui = ui
         self._opener = opener
         self._root = root
         self._filename = b'dirstate'
         self._nodelen = 20
         self._nodeconstants = nodeconstants
+        assert (
+            not use_dirstate_v2
+        ), "should have detected unsupported requirement"
 
         self._parents = None
         self._dirtyparents = False
@@ -1746,13 +1761,14 @@
 if rustmod is not None:
 
     class dirstatemap(object):
-        def __init__(self, ui, opener, root, nodeconstants):
+        def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
+            self._use_dirstate_v2 = use_dirstate_v2
             self._nodeconstants = nodeconstants
             self._ui = ui
             self._opener = opener
             self._root = root
             self._filename = b'dirstate'
-            self._nodelen = 20
+            self._nodelen = 20  # Also update Rust code when changing this!
             self._parents = None
             self._dirtyparents = False
 
@@ -1832,9 +1848,14 @@
 
         def parents(self):
             if not self._parents:
+                if self._use_dirstate_v2:
+                    offset = len(rustmod.V2_FORMAT_MARKER)
+                else:
+                    offset = 0
+                read_len = offset + self._nodelen * 2
                 try:
                     fp = self._opendirstatefile()
-                    st = fp.read(40)
+                    st = fp.read(read_len)
                     fp.close()
                 except IOError as err:
                     if err.errno != errno.ENOENT:
@@ -1843,7 +1864,8 @@
                     st = b''
 
                 l = len(st)
-                if l == self._nodelen * 2:
+                if l == read_len:
+                    st = st[offset:]
                     self._parents = (
                         st[: self._nodelen],
                         st[self._nodelen : 2 * self._nodelen],
@@ -1887,7 +1909,7 @@
                 False,
             )
             self._rustmap, parents = rustmod.DirstateMap.new(
-                use_dirstate_tree, st
+                use_dirstate_tree, self._use_dirstate_v2, st
             )
 
             if parents and not self._dirtyparents:
@@ -1900,7 +1922,10 @@
 
         def write(self, st, now):
             parents = self.parents()
-            st.write(self._rustmap.write(parents[0], parents[1], now))
+            packed = self._rustmap.write(
+                self._use_dirstate_v2, parents[0], parents[1], now
+            )
+            st.write(packed)
             st.close()
             self._dirtyparents = False
 
--- a/mercurial/interfaces/dirstate.py	Wed May 19 13:15:00 2021 +0200
+++ b/mercurial/interfaces/dirstate.py	Wed May 19 13:15:00 2021 +0200
@@ -6,7 +6,15 @@
 
 
 class idirstate(interfaceutil.Interface):
-    def __init__(opener, ui, root, validate, sparsematchfn, nodeconstants):
+    def __init__(
+        opener,
+        ui,
+        root,
+        validate,
+        sparsematchfn,
+        nodeconstants,
+        use_dirstate_v2,
+    ):
         """Create a new dirstate object.
 
         opener is an open()-like callable that can be used to open the
--- a/mercurial/localrepo.py	Wed May 19 13:15:00 2021 +0200
+++ b/mercurial/localrepo.py	Wed May 19 13:15:00 2021 +0200
@@ -1690,6 +1690,8 @@
     def _makedirstate(self):
         """Extension point for wrapping the dirstate per-repo."""
         sparsematchfn = lambda: sparse.matcher(self)
+        v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
+        use_dirstate_v2 = v2_req in self.requirements
 
         return dirstate.dirstate(
             self.vfs,
@@ -1698,6 +1700,7 @@
             self._dirstatevalidate,
             sparsematchfn,
             self.nodeconstants,
+            use_dirstate_v2,
         )
 
     def _dirstatevalidate(self, node):
--- a/rust/hg-core/src/dirstate_tree.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree.rs	Wed May 19 13:15:00 2021 +0200
@@ -1,4 +1,5 @@
 pub mod dirstate_map;
 pub mod dispatch;
+pub mod on_disk;
 pub mod path_with_basename;
 mod status;
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Wed May 19 13:15:00 2021 +0200
@@ -4,14 +4,17 @@
 use std::convert::TryInto;
 use std::path::PathBuf;
 
+use super::on_disk::V2_FORMAT_MARKER;
 use super::path_with_basename::WithBasename;
 use crate::dirstate::parsers::clear_ambiguous_mtime;
 use crate::dirstate::parsers::pack_entry;
 use crate::dirstate::parsers::packed_entry_size;
 use crate::dirstate::parsers::parse_dirstate_entries;
 use crate::dirstate::parsers::Timestamp;
+use crate::errors::HgError;
 use crate::matchers::Matcher;
 use crate::utils::hg_path::{HgPath, HgPathBuf};
+use crate::utils::SliceExt;
 use crate::CopyMapIter;
 use crate::DirstateEntry;
 use crate::DirstateError;
@@ -75,7 +78,24 @@
 );
 
 impl<'on_disk> DirstateMap<'on_disk> {
-    pub fn new(
+    #[timed]
+    pub fn new_v2(
+        on_disk: &'on_disk [u8],
+    ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
+        if let Some(rest) = on_disk.drop_prefix(V2_FORMAT_MARKER) {
+            Self::new_v1(rest)
+        } else if on_disk.is_empty() {
+            Self::new_v1(on_disk)
+        } else {
+            return Err(HgError::corrupted(
+                "missing dirstate-v2 magic number",
+            )
+            .into());
+        }
+    }
+
+    #[timed]
+    pub fn new_v1(
         on_disk: &'on_disk [u8],
     ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
         let mut map = Self {
@@ -84,23 +104,16 @@
             nodes_with_entry_count: 0,
             nodes_with_copy_source_count: 0,
         };
-        let parents = map.read()?;
-        Ok((map, parents))
-    }
-
-    /// Should only be called in `new`
-    #[timed]
-    fn read(&mut self) -> Result<Option<DirstateParents>, DirstateError> {
-        if self.on_disk.is_empty() {
-            return Ok(None);
+        if map.on_disk.is_empty() {
+            return Ok((map, None));
         }
 
         let parents = parse_dirstate_entries(
-            self.on_disk,
+            map.on_disk,
             |path, entry, copy_source| {
                 let tracked = entry.state.is_tracked();
                 let node = Self::get_or_insert_node(
-                    &mut self.root,
+                    &mut map.root,
                     path,
                     WithBasename::to_cow_borrowed,
                     |ancestor| {
@@ -119,14 +132,15 @@
                 );
                 node.entry = Some(*entry);
                 node.copy_source = copy_source.map(Cow::Borrowed);
-                self.nodes_with_entry_count += 1;
+                map.nodes_with_entry_count += 1;
                 if copy_source.is_some() {
-                    self.nodes_with_copy_source_count += 1
+                    map.nodes_with_copy_source_count += 1
                 }
             },
         )?;
+        let parents = Some(parents.clone());
 
-        Ok(Some(parents.clone()))
+        Ok((map, parents))
     }
 
     fn get_node(&self, path: &HgPath) -> Option<&Node> {
@@ -498,7 +512,8 @@
         }
     }
 
-    fn pack(
+    #[timed]
+    fn pack_v1(
         &mut self,
         parents: DirstateParents,
         now: Timestamp,
@@ -533,6 +548,18 @@
         Ok(packed)
     }
 
+    #[timed]
+    fn pack_v2(
+        &mut self,
+        parents: DirstateParents,
+        now: Timestamp,
+    ) -> Result<Vec<u8>, DirstateError> {
+        // Inefficient but temporary
+        let mut v2 = V2_FORMAT_MARKER.to_vec();
+        v2.append(&mut self.pack_v1(parents, now)?);
+        Ok(v2)
+    }
+
     fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
         // Do nothing, this `DirstateMap` does not a separate `all_dirs` that
         // needs to be recomputed
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs	Wed May 19 13:15:00 2021 +0200
@@ -73,7 +73,13 @@
         directory: &HgPath,
     ) -> Result<bool, DirstateMapError>;
 
-    fn pack(
+    fn pack_v1(
+        &mut self,
+        parents: DirstateParents,
+        now: Timestamp,
+    ) -> Result<Vec<u8>, DirstateError>;
+
+    fn pack_v2(
         &mut self,
         parents: DirstateParents,
         now: Timestamp,
@@ -211,7 +217,7 @@
         self.has_dir(directory)
     }
 
-    fn pack(
+    fn pack_v1(
         &mut self,
         parents: DirstateParents,
         now: Timestamp,
@@ -219,6 +225,16 @@
         self.pack(parents, now)
     }
 
+    fn pack_v2(
+        &mut self,
+        _parents: DirstateParents,
+        _now: Timestamp,
+    ) -> Result<Vec<u8>, DirstateError> {
+        panic!(
+            "should have used dirstate_tree::DirstateMap to use the v2 format"
+        )
+    }
+
     fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
         self.set_all_dirs()
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs	Wed May 19 13:15:00 2021 +0200
@@ -0,0 +1,4 @@
+/// Added at the start of `.hg/dirstate` when the "v2" format is used.
+/// Acts like a "magic number". This is a sanity check, not strictly necessary
+/// since `.hg/requires` already governs which format should be used.
+pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
--- a/rust/hg-cpython/src/dirstate.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate.rs	Wed May 19 13:15:00 2021 +0200
@@ -26,6 +26,7 @@
     exc, PyBytes, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
     PySequence, Python,
 };
+use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER;
 use hg::{utils::hg_path::HgPathBuf, DirstateEntry, EntryState, StateMap};
 use libc::{c_char, c_int};
 use std::convert::TryFrom;
@@ -117,6 +118,7 @@
     )?;
     m.add_class::<Dirs>(py)?;
     m.add_class::<DirstateMap>(py)?;
+    m.add(py, "V2_FORMAT_MARKER", PyBytes::new(py, V2_FORMAT_MARKER))?;
     m.add(
         py,
         "status",
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Wed May 19 13:15:00 2021 +0200
@@ -55,13 +55,17 @@
 
     /// Returns a `(dirstate_map, parents)` tuple
     @staticmethod
-    def new(use_dirstate_tree: bool, on_disk: PyBytes) -> PyResult<PyObject> {
-        let dirstate_error = |_: DirstateError| {
-            PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string())
+    def new(
+        use_dirstate_tree: bool,
+        use_dirstate_v2: bool,
+        on_disk: PyBytes,
+    ) -> PyResult<PyObject> {
+        let dirstate_error = |e: DirstateError| {
+            PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
         };
-        let (inner, parents) = if use_dirstate_tree {
+        let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 {
             let (map, parents) =
-                OwningDirstateMap::new(py, on_disk)
+                OwningDirstateMap::new(py, on_disk, use_dirstate_v2)
                 .map_err(dirstate_error)?;
             (Box::new(map) as _, parents)
         } else {
@@ -288,6 +292,7 @@
 
     def write(
         &self,
+        use_dirstate_v2: bool,
         p1: PyObject,
         p2: PyObject,
         now: PyObject
@@ -298,7 +303,13 @@
             p2: extract_node_id(py, &p2)?,
         };
 
-        match self.inner(py).borrow_mut().pack(parents, now) {
+        let mut inner = self.inner(py).borrow_mut();
+        let result = if use_dirstate_v2 {
+            inner.pack_v2(parents, now)
+        } else {
+            inner.pack_v1(parents, now)
+        };
+        match result {
             Ok(packed) => Ok(PyBytes::new(py, &packed)),
             Err(_) => Err(PyErr::new::<exc::OSError, _>(
                 py,
--- a/rust/hg-cpython/src/dirstate/dispatch.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs	Wed May 19 13:15:00 2021 +0200
@@ -101,12 +101,20 @@
         self.get_mut().has_dir(directory)
     }
 
-    fn pack(
+    fn pack_v1(
         &mut self,
         parents: DirstateParents,
         now: Timestamp,
     ) -> Result<Vec<u8>, DirstateError> {
-        self.get_mut().pack(parents, now)
+        self.get_mut().pack_v1(parents, now)
+    }
+
+    fn pack_v2(
+        &mut self,
+        parents: DirstateParents,
+        now: Timestamp,
+    ) -> Result<Vec<u8>, DirstateError> {
+        self.get_mut().pack_v2(parents, now)
     }
 
     fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
--- a/rust/hg-cpython/src/dirstate/owning.rs	Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/owning.rs	Wed May 19 13:15:00 2021 +0200
@@ -31,9 +31,14 @@
     pub fn new(
         py: Python,
         on_disk: PyBytes,
+        use_dirstate_v2: bool,
     ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
         let bytes: &'_ [u8] = on_disk.data(py);
-        let (map, parents) = DirstateMap::new(bytes)?;
+        let (map, parents) = if use_dirstate_v2 {
+            DirstateMap::new_v2(bytes)?
+        } else {
+            DirstateMap::new_v1(bytes)?
+        };
 
         // Like in `bytes` above, this `'_` lifetime parameter borrows from
         // the bytes buffer owned by `on_disk`.