dirstate-v2: Change the on-disk format when the requirement is enabled
For now, the format is the same except with an additional marker at the start.
This marker is redundant: for existing repositories it is `.hg/requires` that
determines which format to use. For new repositories, it is the new
`format.exp-dirstate-v2` config. There is no upgrade or downgrade so far.
Most of the changes are about plumbing a boolean through layers of APIs to
indicate which format should be used.
Differential Revision: https://phab.mercurial-scm.org/D10719
--- a/hgext/largefiles/lfutil.py Wed May 19 13:15:00 2021 +0200
+++ b/hgext/largefiles/lfutil.py Wed May 19 13:15:00 2021 +0200
@@ -25,6 +25,7 @@
httpconnection,
match as matchmod,
pycompat,
+ requirements,
scmutil,
sparse,
util,
@@ -197,6 +198,7 @@
vfs = repo.vfs
lfstoredir = longname
opener = vfsmod.vfs(vfs.join(lfstoredir))
+ use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
lfdirstate = largefilesdirstate(
opener,
ui,
@@ -204,6 +206,7 @@
repo.dirstate._validate,
lambda: sparse.matcher(repo),
repo.nodeconstants,
+ use_dirstate_v2,
)
# If the largefiles dirstate does not exist, populate and create
--- a/mercurial/dirstate.py Wed May 19 13:15:00 2021 +0200
+++ b/mercurial/dirstate.py Wed May 19 13:15:00 2021 +0200
@@ -75,7 +75,14 @@
@interfaceutil.implementer(intdirstate.idirstate)
class dirstate(object):
def __init__(
- self, opener, ui, root, validate, sparsematchfn, nodeconstants
+ self,
+ opener,
+ ui,
+ root,
+ validate,
+ sparsematchfn,
+ nodeconstants,
+ use_dirstate_v2,
):
"""Create a new dirstate object.
@@ -83,6 +90,7 @@
dirstate file; root is the root of the directory tracked by
the dirstate.
"""
+ self._use_dirstate_v2 = use_dirstate_v2
self._nodeconstants = nodeconstants
self._opener = opener
self._validate = validate
@@ -141,7 +149,11 @@
def _map(self):
"""Return the dirstate contents (see documentation for dirstatemap)."""
self._map = self._mapcls(
- self._ui, self._opener, self._root, self._nodeconstants
+ self._ui,
+ self._opener,
+ self._root,
+ self._nodeconstants,
+ self._use_dirstate_v2,
)
return self._map
@@ -1435,13 +1447,16 @@
denormalized form that they appear as in the dirstate.
"""
- def __init__(self, ui, opener, root, nodeconstants):
+ def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
self._ui = ui
self._opener = opener
self._root = root
self._filename = b'dirstate'
self._nodelen = 20
self._nodeconstants = nodeconstants
+ assert (
+ not use_dirstate_v2
+ ), "should have detected unsupported requirement"
self._parents = None
self._dirtyparents = False
@@ -1746,13 +1761,14 @@
if rustmod is not None:
class dirstatemap(object):
- def __init__(self, ui, opener, root, nodeconstants):
+ def __init__(self, ui, opener, root, nodeconstants, use_dirstate_v2):
+ self._use_dirstate_v2 = use_dirstate_v2
self._nodeconstants = nodeconstants
self._ui = ui
self._opener = opener
self._root = root
self._filename = b'dirstate'
- self._nodelen = 20
+ self._nodelen = 20 # Also update Rust code when changing this!
self._parents = None
self._dirtyparents = False
@@ -1832,9 +1848,14 @@
def parents(self):
if not self._parents:
+ if self._use_dirstate_v2:
+ offset = len(rustmod.V2_FORMAT_MARKER)
+ else:
+ offset = 0
+ read_len = offset + self._nodelen * 2
try:
fp = self._opendirstatefile()
- st = fp.read(40)
+ st = fp.read(read_len)
fp.close()
except IOError as err:
if err.errno != errno.ENOENT:
@@ -1843,7 +1864,8 @@
st = b''
l = len(st)
- if l == self._nodelen * 2:
+ if l == read_len:
+ st = st[offset:]
self._parents = (
st[: self._nodelen],
st[self._nodelen : 2 * self._nodelen],
@@ -1887,7 +1909,7 @@
False,
)
self._rustmap, parents = rustmod.DirstateMap.new(
- use_dirstate_tree, st
+ use_dirstate_tree, self._use_dirstate_v2, st
)
if parents and not self._dirtyparents:
@@ -1900,7 +1922,10 @@
def write(self, st, now):
parents = self.parents()
- st.write(self._rustmap.write(parents[0], parents[1], now))
+ packed = self._rustmap.write(
+ self._use_dirstate_v2, parents[0], parents[1], now
+ )
+ st.write(packed)
st.close()
self._dirtyparents = False
--- a/mercurial/interfaces/dirstate.py Wed May 19 13:15:00 2021 +0200
+++ b/mercurial/interfaces/dirstate.py Wed May 19 13:15:00 2021 +0200
@@ -6,7 +6,15 @@
class idirstate(interfaceutil.Interface):
- def __init__(opener, ui, root, validate, sparsematchfn, nodeconstants):
+ def __init__(
+ opener,
+ ui,
+ root,
+ validate,
+ sparsematchfn,
+ nodeconstants,
+ use_dirstate_v2,
+ ):
"""Create a new dirstate object.
opener is an open()-like callable that can be used to open the
--- a/mercurial/localrepo.py Wed May 19 13:15:00 2021 +0200
+++ b/mercurial/localrepo.py Wed May 19 13:15:00 2021 +0200
@@ -1690,6 +1690,8 @@
def _makedirstate(self):
"""Extension point for wrapping the dirstate per-repo."""
sparsematchfn = lambda: sparse.matcher(self)
+ v2_req = requirementsmod.DIRSTATE_V2_REQUIREMENT
+ use_dirstate_v2 = v2_req in self.requirements
return dirstate.dirstate(
self.vfs,
@@ -1698,6 +1700,7 @@
self._dirstatevalidate,
sparsematchfn,
self.nodeconstants,
+ use_dirstate_v2,
)
def _dirstatevalidate(self, node):
--- a/rust/hg-core/src/dirstate_tree.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree.rs Wed May 19 13:15:00 2021 +0200
@@ -1,4 +1,5 @@
pub mod dirstate_map;
pub mod dispatch;
+pub mod on_disk;
pub mod path_with_basename;
mod status;
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Wed May 19 13:15:00 2021 +0200
@@ -4,14 +4,17 @@
use std::convert::TryInto;
use std::path::PathBuf;
+use super::on_disk::V2_FORMAT_MARKER;
use super::path_with_basename::WithBasename;
use crate::dirstate::parsers::clear_ambiguous_mtime;
use crate::dirstate::parsers::pack_entry;
use crate::dirstate::parsers::packed_entry_size;
use crate::dirstate::parsers::parse_dirstate_entries;
use crate::dirstate::parsers::Timestamp;
+use crate::errors::HgError;
use crate::matchers::Matcher;
use crate::utils::hg_path::{HgPath, HgPathBuf};
+use crate::utils::SliceExt;
use crate::CopyMapIter;
use crate::DirstateEntry;
use crate::DirstateError;
@@ -75,7 +78,24 @@
);
impl<'on_disk> DirstateMap<'on_disk> {
- pub fn new(
+ #[timed]
+ pub fn new_v2(
+ on_disk: &'on_disk [u8],
+ ) -> Result<(Self, Option<DirstateParents>), DirstateError> {
+ if let Some(rest) = on_disk.drop_prefix(V2_FORMAT_MARKER) {
+ Self::new_v1(rest)
+ } else if on_disk.is_empty() {
+ Self::new_v1(on_disk)
+ } else {
+ return Err(HgError::corrupted(
+ "missing dirstate-v2 magic number",
+ )
+ .into());
+ }
+ }
+
+ #[timed]
+ pub fn new_v1(
on_disk: &'on_disk [u8],
) -> Result<(Self, Option<DirstateParents>), DirstateError> {
let mut map = Self {
@@ -84,23 +104,16 @@
nodes_with_entry_count: 0,
nodes_with_copy_source_count: 0,
};
- let parents = map.read()?;
- Ok((map, parents))
- }
-
- /// Should only be called in `new`
- #[timed]
- fn read(&mut self) -> Result<Option<DirstateParents>, DirstateError> {
- if self.on_disk.is_empty() {
- return Ok(None);
+ if map.on_disk.is_empty() {
+ return Ok((map, None));
}
let parents = parse_dirstate_entries(
- self.on_disk,
+ map.on_disk,
|path, entry, copy_source| {
let tracked = entry.state.is_tracked();
let node = Self::get_or_insert_node(
- &mut self.root,
+ &mut map.root,
path,
WithBasename::to_cow_borrowed,
|ancestor| {
@@ -119,14 +132,15 @@
);
node.entry = Some(*entry);
node.copy_source = copy_source.map(Cow::Borrowed);
- self.nodes_with_entry_count += 1;
+ map.nodes_with_entry_count += 1;
if copy_source.is_some() {
- self.nodes_with_copy_source_count += 1
+ map.nodes_with_copy_source_count += 1
}
},
)?;
+ let parents = Some(parents.clone());
- Ok(Some(parents.clone()))
+ Ok((map, parents))
}
fn get_node(&self, path: &HgPath) -> Option<&Node> {
@@ -498,7 +512,8 @@
}
}
- fn pack(
+ #[timed]
+ fn pack_v1(
&mut self,
parents: DirstateParents,
now: Timestamp,
@@ -533,6 +548,18 @@
Ok(packed)
}
+ #[timed]
+ fn pack_v2(
+ &mut self,
+ parents: DirstateParents,
+ now: Timestamp,
+ ) -> Result<Vec<u8>, DirstateError> {
+ // Inefficient but temporary
+ let mut v2 = V2_FORMAT_MARKER.to_vec();
+ v2.append(&mut self.pack_v1(parents, now)?);
+ Ok(v2)
+ }
+
fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
// Do nothing, this `DirstateMap` does not a separate `all_dirs` that
// needs to be recomputed
--- a/rust/hg-core/src/dirstate_tree/dispatch.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-core/src/dirstate_tree/dispatch.rs Wed May 19 13:15:00 2021 +0200
@@ -73,7 +73,13 @@
directory: &HgPath,
) -> Result<bool, DirstateMapError>;
- fn pack(
+ fn pack_v1(
+ &mut self,
+ parents: DirstateParents,
+ now: Timestamp,
+ ) -> Result<Vec<u8>, DirstateError>;
+
+ fn pack_v2(
&mut self,
parents: DirstateParents,
now: Timestamp,
@@ -211,7 +217,7 @@
self.has_dir(directory)
}
- fn pack(
+ fn pack_v1(
&mut self,
parents: DirstateParents,
now: Timestamp,
@@ -219,6 +225,16 @@
self.pack(parents, now)
}
+ fn pack_v2(
+ &mut self,
+ _parents: DirstateParents,
+ _now: Timestamp,
+ ) -> Result<Vec<u8>, DirstateError> {
+ panic!(
+ "should have used dirstate_tree::DirstateMap to use the v2 format"
+ )
+ }
+
fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
self.set_all_dirs()
}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/dirstate_tree/on_disk.rs Wed May 19 13:15:00 2021 +0200
@@ -0,0 +1,4 @@
+/// Added at the start of `.hg/dirstate` when the "v2" format is used.
+/// Acts like a "magic number". This is a sanity check, not strictly necessary
+/// since `.hg/requires` already governs which format should be used.
+pub const V2_FORMAT_MARKER: &[u8; 12] = b"dirstate-v2\n";
--- a/rust/hg-cpython/src/dirstate.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate.rs Wed May 19 13:15:00 2021 +0200
@@ -26,6 +26,7 @@
exc, PyBytes, PyDict, PyErr, PyList, PyModule, PyObject, PyResult,
PySequence, Python,
};
+use hg::dirstate_tree::on_disk::V2_FORMAT_MARKER;
use hg::{utils::hg_path::HgPathBuf, DirstateEntry, EntryState, StateMap};
use libc::{c_char, c_int};
use std::convert::TryFrom;
@@ -117,6 +118,7 @@
)?;
m.add_class::<Dirs>(py)?;
m.add_class::<DirstateMap>(py)?;
+ m.add(py, "V2_FORMAT_MARKER", PyBytes::new(py, V2_FORMAT_MARKER))?;
m.add(
py,
"status",
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs Wed May 19 13:15:00 2021 +0200
@@ -55,13 +55,17 @@
/// Returns a `(dirstate_map, parents)` tuple
@staticmethod
- def new(use_dirstate_tree: bool, on_disk: PyBytes) -> PyResult<PyObject> {
- let dirstate_error = |_: DirstateError| {
- PyErr::new::<exc::OSError, _>(py, "Dirstate error".to_string())
+ def new(
+ use_dirstate_tree: bool,
+ use_dirstate_v2: bool,
+ on_disk: PyBytes,
+ ) -> PyResult<PyObject> {
+ let dirstate_error = |e: DirstateError| {
+ PyErr::new::<exc::OSError, _>(py, format!("Dirstate error: {:?}", e))
};
- let (inner, parents) = if use_dirstate_tree {
+ let (inner, parents) = if use_dirstate_tree || use_dirstate_v2 {
let (map, parents) =
- OwningDirstateMap::new(py, on_disk)
+ OwningDirstateMap::new(py, on_disk, use_dirstate_v2)
.map_err(dirstate_error)?;
(Box::new(map) as _, parents)
} else {
@@ -288,6 +292,7 @@
def write(
&self,
+ use_dirstate_v2: bool,
p1: PyObject,
p2: PyObject,
now: PyObject
@@ -298,7 +303,13 @@
p2: extract_node_id(py, &p2)?,
};
- match self.inner(py).borrow_mut().pack(parents, now) {
+ let mut inner = self.inner(py).borrow_mut();
+ let result = if use_dirstate_v2 {
+ inner.pack_v2(parents, now)
+ } else {
+ inner.pack_v1(parents, now)
+ };
+ match result {
Ok(packed) => Ok(PyBytes::new(py, &packed)),
Err(_) => Err(PyErr::new::<exc::OSError, _>(
py,
--- a/rust/hg-cpython/src/dirstate/dispatch.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/dispatch.rs Wed May 19 13:15:00 2021 +0200
@@ -101,12 +101,20 @@
self.get_mut().has_dir(directory)
}
- fn pack(
+ fn pack_v1(
&mut self,
parents: DirstateParents,
now: Timestamp,
) -> Result<Vec<u8>, DirstateError> {
- self.get_mut().pack(parents, now)
+ self.get_mut().pack_v1(parents, now)
+ }
+
+ fn pack_v2(
+ &mut self,
+ parents: DirstateParents,
+ now: Timestamp,
+ ) -> Result<Vec<u8>, DirstateError> {
+ self.get_mut().pack_v2(parents, now)
}
fn set_all_dirs(&mut self) -> Result<(), DirstateMapError> {
--- a/rust/hg-cpython/src/dirstate/owning.rs Wed May 19 13:15:00 2021 +0200
+++ b/rust/hg-cpython/src/dirstate/owning.rs Wed May 19 13:15:00 2021 +0200
@@ -31,9 +31,14 @@
pub fn new(
py: Python,
on_disk: PyBytes,
+ use_dirstate_v2: bool,
) -> Result<(Self, Option<DirstateParents>), DirstateError> {
let bytes: &'_ [u8] = on_disk.data(py);
- let (map, parents) = DirstateMap::new(bytes)?;
+ let (map, parents) = if use_dirstate_v2 {
+ DirstateMap::new_v2(bytes)?
+ } else {
+ DirstateMap::new_v1(bytes)?
+ };
// Like in `bytes` above, this `'_` lifetime parameter borrows from
// the bytes buffer owned by `on_disk`.