Mercurial > hg
changeset 47280:1766130fe9ba
dirstate-v2: Change the on-disk format when the requirement is enabled
For now, the format is the same except with an additional marker at the start.
This marker is redundant: for existing repositories it is `.hg/requires` that
determines which format to use. For new repositories, it is the new
`format.exp-dirstate-v2` config. There is no upgrade or downgrade so far.
Most of the changes are about plumbing a boolean through layers of APIs to
indicate which format should be used.
Differential Revision: https://phab.mercurial-scm.org/D10719
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Wed, 19 May 2021 13:15:00 +0200 |
parents | 40b51c28b242 |
children | 6763913fa175 |
files | hgext/largefiles/lfutil.py mercurial/dirstate.py mercurial/interfaces/dirstate.py mercurial/localrepo.py rust/hg-core/src/dirstate_tree.rs rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/dirstate_tree/dispatch.rs rust/hg-core/src/dirstate_tree/on_disk.rs rust/hg-cpython/src/dirstate.rs rust/hg-cpython/src/dirstate/dirstate_map.rs rust/hg-cpython/src/dirstate/dispatch.rs rust/hg-cpython/src/dirstate/owning.rs |
diffstat | 12 files changed, 150 insertions(+), 37 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/largefiles/lfutil.py Wed May 19 13:15:00 2021 +0200 +++ b/hgext/largefiles/lfutil.py Wed May 19 13:15:00 2021 +0200 @@ -25,6 +25,7 @@ httpconnection, match as matchmod, pycompat, + requirements, scmutil, sparse, util, @@ -197,6 +198,7 @@ vfs = repo.vfs lfstoredir = longname opener = vfsmod.vfs(vfs.join(lfstoredir)) + use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements lfdirstate = largefilesdirstate( opener, ui, @@ -204,6 +206,7 @@ repo.dirstate._validate, lambda: sparse.matcher(repo), repo.nodeconstants, + use_dirstate_v2, ) # If the largefiles dirstate does not exist, populate and create
--- a/mercurial/dirstate.py Wed May 19 13:15:00 2021 +0200 +++ b/mercurial/dirstate.py Wed May 19 13:15:00 2021 +0200 @@ -75,7 +75,14 @@ @interfaceutil.implementer(intdirstate.idirstate) class dirstate(object): def __init__( - self, opener, ui, root, validate, sparsematchfn, nodeconstants + self, + opener, + ui, + root, + validate, + sparsematchfn, + nodeconstants, + use_dirstate_v2, ): """Create a new dirstate object. @@ -83,6 +90,7 @@ dirstate file; root is the root of the directory tracked by the dirstate. """ + self._use_dirstate_v2 = use_dirstate_v2 self._nodeconstants = nodeconstants self._opener = opener self._validate = validate @@ -141,7 +149,11 @@ def _map(self): """Return the dirstate contents (see documentation for dirstatemap).""" self._map = self._mapcls( - self._ui, self._opener, self._root, self._nodeconstants + self._ui, + self._opener, + self._root, + self._nodeconstants, + self._use_dirstate_v2, ) return self._map @@ -1435,13 +1447,16 @@ denormalized form that they appear as in the dirstate. """ - def __init__(self, ui, opener, root, nodeconstants): + def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): self._ui = ui self._opener = opener self._root = root self._filename = b'dirstate' self._nodelen = 20 self._nodeconstants = nodeconstants + assert ( + not use_dirstate_v2 + ), "should have detected unsupported requirement" self._parents = None self._dirtyparents = False @@ -1746,13 +1761,14 @@ if rustmod is not None: class dirstatemap(object): - def __init__(self, ui, opener, root, nodeconstants): + def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2): + self._use_dirstate_v2 = use_dirstate_v2 self._nodeconstants = nodeconstants self._ui = ui self._opener = opener self._root = root self._filename = b'dirstate' - self._nodelen = 20 + self._nodelen = 20 # Also update Rust code when changing this! self._parents = None self._dirtyparents = False @@ -1832,9 +1848,14 @@ def parents(self): if not self._parents: + if self._use_dirstate_v2: + offset = len(rustmod.V2_FORMAT_MARKER) + else: + offset = 0 + read_len = offset + self._nodelen * 2 try: fp = self._opendirstatefile() - st = fp.read(40) + st = fp.read(read_len) fp.close() except IOError as err: if err.errno != errno.ENOENT: @@ -1843,7 +1864,8 @@ st = b'' l = len(st) - if l == self._nodelen * 2: + if l == read_len: + st = st[offset:] self._parents = ( st[: self._nodelen], st[self._nodelen : 2 * self._nodelen], @@ -1887,7 +1909,7 @@ False, ) self._rustmap, parents = rustmod.DirstateMap.new( - use_dirstate_tree, st + use_dirstate_tree, self._use_dirstate_v2, st ) if parents and not self._dirtyparents: @@ -1900,7 +1922,10 @@ def write(self, st, now): parents = self.parents() - st.write(self._rustmap.write(parents[0], parents[1], now)) + packed = self._rustmap.write( + self._use_dirstate_v2, parents[0], parents[1], now + ) + st.write(packed) st.close() self._dirtyparents = False
--- a/mercurial/interfaces/dirstate.py Wed May 19 13:15:00 2021 +0200 +++ b/mercurial/interfaces/dirstate.py Wed May 19 13:15:00 2021 +0200 @@ -6,7 +6,15 @@ class idirstate(interfaceutil.Interface): - def __init__(opener, ui, root, validate, sparsematchfn, nodeconstants): + def __init__( + opener, + ui, + root, + validate, + sparsematchfn, + nodeconstants, + use_dirstate_v2, + ): """Create a new dirstate object. opener is an open()-like callable that can be used to open the
--- a/mercurial/localrepo.py Wed May 19 13:15:00 2021 +0200 +++ b/mercurial/localrepo.py Wed May 19 13:15:00 2021 +0200 @@ -1690,6 +1690,8 @@ def _makedirstate(self): """Extension point for wrapping the dirstate per-repo.""" sparsematchfn = lambda: sparse.matcher(self) + v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT + use_dirstate_v2 = v2_req in self.requirements return dirstate.dirstate( self.vfs, @@ -1698,6 +1700,7 @@ self._dirstatevalidate, sparsematchfn, self.nodeconstants, + use_dirstate_v2, ) def _dirstatevalidate(self, node):
--- a/rust/hg-core/src/dirstate_tree.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree.rs Wed May 19 13:15:00 2021 +0200 @@ -1,4 +1,5 @@ pub mod dirstate_map; pub mod dispatch; +pub mod on_disk; pub mod path_with_basename; mod status;
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -4,14 +4,17 @@ use std::convert::TryInto; use std::path::PathBuf; +use super::on_disk::V2_FORMAT_MARKER; use super::path_with_basename::WithBasename; use crate::dirstate::parsers::clear_ambiguous_mtime; use crate::dirstate::parsers::pack_entry; use crate::dirstate::parsers::packed_entry_size; use crate::dirstate::parsers::parse_dirstate_entries; use crate::dirstate::parsers::Timestamp; +use crate::errors::HgError; use crate::matchers::Matcher; use crate::utils::hg_path::{HgPath, HgPathBuf}; +use crate::utils::SliceExt; use crate::CopyMapIter; use crate::DirstateEntry; use crate::DirstateError; @@ -75,7 +78,24 @@ ); impl<'on_disk> DirstateMap<'on_disk> { - pub fn new( + #[timed] + pub fn new_v2( + on_disk: &'on_disk [u8], + ) -> Result<(Self, Option<DirstateParents>), DirstateError> { + if let Some(rest) = on_disk.drop_prefix(V2_FORMAT_MARKER) { + Self::new_v1(rest) + } else if on_disk.is_empty() { + Self::new_v1(on_disk) + } else { + return Err(HgError::corrupted( + "missing dirstate-v2 magic number", + ) + .into()); + } + } + + #[timed] + pub fn new_v1( on_disk: &'on_disk [u8], ) -> Result<(Self, Option<DirstateParents>), DirstateError> { let mut map = Self { @@ -84,23 +104,16 @@ nodes_with_entry_count: 0, nodes_with_copy_source_count: 0, }; - let parents = map.read()?; - Ok((map, parents)) - } - - /// Should only be called in `new` - #[timed] - fn read(&mut self) -> Result<Option<DirstateParents>, DirstateError> { - if self.on_disk.is_empty() { - return Ok(None); + if map.on_disk.is_empty() { + return Ok((map, None)); } let parents = parse_dirstate_entries( - self.on_disk, + map.on_disk, |path, entry, copy_source| { let tracked = entry.state.is_tracked(); let node = Self::get_or_insert_node( - &mut self.root, + &mut map.root, path, WithBasename::to_cow_borrowed, |ancestor| { @@ -119,14 +132,15 @@ ); node.entry = Some(*entry); node.copy_source = copy_source.map(Cow::Borrowed); - self.nodes_with_entry_count += 1; + map.nodes_with_entry_count += 1; if copy_source.is_some() { - self.nodes_with_copy_source_count += 1 + map.nodes_with_copy_source_count += 1 } }, )?; + let parents = Some(parents.clone()); - Ok(Some(parents.clone())) + Ok((map, parents)) } fn get_node(&self, path: &HgPath) -> Option<&Node> { @@ -498,7 +512,8 @@ } } - fn pack( + #[timed] + fn pack_v1( &mut self, parents: DirstateParents, now: Timestamp, @@ -533,6 +548,18 @@ Ok(packed) } + #[timed] + fn pack_v2( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result<Vec<u8>, DirstateError> { + // Inefficient but temporary + let mut v2 = V2_FORMAT_MARKER.to_vec(); + v2.append(&mut self.pack_v1(parents, now)?); + Ok(v2) + } + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { // Do nothing, this `DirstateMap` does not a separate `all_dirs` that // needs to be recomputed
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-core/src/dirstate_tree/dispatch.rs Wed May 19 13:15:00 2021 +0200 @@ -73,7 +73,13 @@ directory: &HgPath, ) -> Result<bool, DirstateMapError>; - fn pack( + fn pack_v1( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result<Vec<u8>, DirstateError>; + + fn pack_v2( &mut self, parents: DirstateParents, now: Timestamp, @@ -211,7 +217,7 @@ self.has_dir(directory) } - fn pack( + fn pack_v1( &mut self, parents: DirstateParents, now: Timestamp, @@ -219,6 +225,16 @@ self.pack(parents, now) } + fn pack_v2( + &mut self, + _parents: DirstateParents, + _now: Timestamp, + ) -> Result<Vec<u8>, DirstateError> { + panic!( + "should have used dirstate_tree::DirstateMap to use the v2 format" + ) + } + fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> { self.set_all_dirs() }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200 @@ -0,0 +1,4 @@ +/// Added at the start of `.hg/dirstate` when the "v2" format is used. +/// Acts like a "magic number". This is a sanity check, not strictly necessary +/// since `.hg/requires` already governs which format should be used. +pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
--- a/rust/hg-cpython/src/dirstate.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate.rs Wed May 19 13:15:00 2021 +0200 @@ -26,6 +26,7 @@ exc, PyBytes, PyDict, PyErr, PyList, PyModule, PyObject, PyResult, PySequence, Python, }; +use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER; use hg::{utils::hg_path::HgPathBuf, DirstateEntry, EntryState, StateMap}; use libc::{c_char, c_int}; use std::convert::TryFrom; @@ -117,6 +118,7 @@ )?; m.add_class::<Dirs>(py)?; m.add_class::<DirstateMap>(py)?; + m.add(py, "V2_FORMAT_MARKER", PyBytes::new(py, V2_FORMAT_MARKER))?; m.add( py, "status",
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200 @@ -55,13 +55,17 @@ /// Returns a `(dirstate_map, parents)` tuple @staticmethod - def new(use_dirstate_tree: bool, on_disk: PyBytes) -> PyResult<PyObject> { - let dirstate_error = |_: DirstateError| { - PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string()) + def new( + use_dirstate_tree: bool, + use_dirstate_v2: bool, + on_disk: PyBytes, + ) -> PyResult<PyObject> { + let dirstate_error = |e: DirstateError| { + PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e)) }; - let (inner, parents) = if use_dirstate_tree { + let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 { let (map, parents) = - OwningDirstateMap::new(py, on_disk) + OwningDirstateMap::new(py, on_disk, use_dirstate_v2) .map_err(dirstate_error)?; (Box::new(map) as _, parents) } else { @@ -288,6 +292,7 @@ def write( &self, + use_dirstate_v2: bool, p1: PyObject, p2: PyObject, now: PyObject @@ -298,7 +303,13 @@ p2: extract_node_id(py, &p2)?, }; - match self.inner(py).borrow_mut().pack(parents, now) { + let mut inner = self.inner(py).borrow_mut(); + let result = if use_dirstate_v2 { + inner.pack_v2(parents, now) + } else { + inner.pack_v1(parents, now) + }; + match result { Ok(packed) => Ok(PyBytes::new(py, &packed)), Err(_) => Err(PyErr::new::<exc::OSError, _>( py,
--- a/rust/hg-cpython/src/dirstate/dispatch.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/dispatch.rs Wed May 19 13:15:00 2021 +0200 @@ -101,12 +101,20 @@ self.get_mut().has_dir(directory) } - fn pack( + fn pack_v1( &mut self, parents: DirstateParents, now: Timestamp, ) -> Result<Vec<u8>, DirstateError> { - self.get_mut().pack(parents, now) + self.get_mut().pack_v1(parents, now) + } + + fn pack_v2( + &mut self, + parents: DirstateParents, + now: Timestamp, + ) -> Result<Vec<u8>, DirstateError> { + self.get_mut().pack_v2(parents, now) } fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
--- a/rust/hg-cpython/src/dirstate/owning.rs Wed May 19 13:15:00 2021 +0200 +++ b/rust/hg-cpython/src/dirstate/owning.rs Wed May 19 13:15:00 2021 +0200 @@ -31,9 +31,14 @@ pub fn new( py: Python, on_disk: PyBytes, + use_dirstate_v2: bool, ) -> Result<(Self, Option<DirstateParents>), DirstateError> { let bytes: &'_ [u8] = on_disk.data(py); - let (map, parents) = DirstateMap::new(bytes)?; + let (map, parents) = if use_dirstate_v2 { + DirstateMap::new_v2(bytes)? + } else { + DirstateMap::new_v1(bytes)? + }; // Like in `bytes` above, this `'_` lifetime parameter borrows from // the bytes buffer owned by `on_disk`.