rust-dirstate: fall back to v1 if reading v2 failed
This will help us not fail when a v1 dirstate is present on disk while a v2
was expected (which could happen with a racy/interrupted upgrade).
--- a/mercurial/dirstatemap.py Mon Jun 05 17:29:52 2023 +0200
+++ b/mercurial/dirstatemap.py Mon Jun 05 16:43:27 2023 +0200
@@ -4,6 +4,7 @@
# GNU General Public License version 2 or any later version.
+import struct
from .i18n import _
from . import (
@@ -151,9 +152,15 @@
b'dirstate only has a docket in v2 format'
)
self._set_identity()
- self._docket = docketmod.DirstateDocket.parse(
- self._readdirstatefile(), self._nodeconstants
- )
+ try:
+ self._docket = docketmod.DirstateDocket.parse(
+ self._readdirstatefile(), self._nodeconstants
+ )
+ except struct.error:
+ self._ui.debug(b"failed to read dirstate-v2 data")
+ raise error.CorruptedDirstate(
+ b"failed to read dirstate-v2 data"
+ )
return self._docket
def _read_v2_data(self):
@@ -176,11 +183,23 @@
return self._opener.read(self.docket.data_filename())
def write_v2_no_append(self, tr, st, meta, packed):
- old_docket = self.docket
+ try:
+ old_docket = self.docket
+ except error.CorruptedDirstate:
+ # This means we've identified a dirstate-v1 file on-disk when we
+ # were expecting a dirstate-v2 docket. We've managed to recover
+ # from that unexpected situation, and now we want to write back a
+ # dirstate-v2 file to make the on-disk situation right again.
+ #
+ # This shouldn't be triggered since `self.docket` is cached and
+ # we would have called parents() or read() first, but it's here
+ # just in case.
+ old_docket = None
+
new_docket = docketmod.DirstateDocket.with_new_uuid(
self.parents(), len(packed), meta
)
- if old_docket.uuid == new_docket.uuid:
+ if old_docket is not None and old_docket.uuid == new_docket.uuid:
raise error.ProgrammingError(b'dirstate docket name collision')
data_filename = new_docket.data_filename()
self._opener.write(data_filename, packed)
@@ -194,7 +213,7 @@
st.close()
# Remove the old data file after the new docket pointing to
# the new data file was written.
- if old_docket.uuid:
+ if old_docket is not None and old_docket.uuid:
data_filename = old_docket.data_filename()
if tr is not None:
tr.addbackup(data_filename, location=b'plain')
@@ -211,28 +230,40 @@
def parents(self):
if not self._parents:
if self._use_dirstate_v2:
- self._parents = self.docket.parents
+ try:
+ self.docket
+ except error.CorruptedDirstate as e:
+ # fall back to dirstate-v1 if we fail to read v2
+ self._v1_parents(e)
+ else:
+ self._parents = self.docket.parents
else:
- read_len = self._nodelen * 2
- st = self._readdirstatefile(read_len)
- l = len(st)
- if l == read_len:
- self._parents = (
- st[: self._nodelen],
- st[self._nodelen : 2 * self._nodelen],
- )
- elif l == 0:
- self._parents = (
- self._nodeconstants.nullid,
- self._nodeconstants.nullid,
- )
- else:
- raise error.Abort(
- _(b'working directory state appears damaged!')
- )
+ self._v1_parents()
return self._parents
+ def _v1_parents(self, from_v2_exception=None):
+ read_len = self._nodelen * 2
+ st = self._readdirstatefile(read_len)
+ l = len(st)
+ if l == read_len:
+ self._parents = (
+ st[: self._nodelen],
+ st[self._nodelen : 2 * self._nodelen],
+ )
+ elif l == 0:
+ self._parents = (
+ self._nodeconstants.nullid,
+ self._nodeconstants.nullid,
+ )
+ else:
+ hint = None
+ if from_v2_exception is not None:
+ hint = _(b"falling back to dirstate-v1 from v2 also failed")
+ raise error.Abort(
+ _(b'working directory state appears damaged!'), hint
+ )
+
class dirstatemap(_dirstatemapcommon):
"""Map encapsulating the dirstate's contents.
@@ -330,11 +361,17 @@
def read(self):
testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
if self._use_dirstate_v2:
-
- if not self.docket.uuid:
- return
- testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
- st = self._read_v2_data()
+ try:
+ self.docket
+ except error.CorruptedDirstate:
+ # fall back to dirstate-v1 if we fail to read v2
+ self._set_identity()
+ st = self._readdirstatefile()
+ else:
+ if not self.docket.uuid:
+ return
+ testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
+ st = self._read_v2_data()
else:
self._set_identity()
st = self._readdirstatefile()
@@ -365,10 +402,17 @@
#
# (we cannot decorate the function directly since it is in a C module)
if self._use_dirstate_v2:
- p = self.docket.parents
- meta = self.docket.tree_metadata
- parse_dirstate = util.nogc(v2.parse_dirstate)
- parse_dirstate(self._map, self.copymap, st, meta)
+ try:
+ self.docket
+ except error.CorruptedDirstate:
+ # fall back to dirstate-v1 if we fail to parse v2
+ parse_dirstate = util.nogc(parsers.parse_dirstate)
+ p = parse_dirstate(self._map, self.copymap, st)
+ else:
+ p = self.docket.parents
+ meta = self.docket.tree_metadata
+ parse_dirstate = util.nogc(v2.parse_dirstate)
+ parse_dirstate(self._map, self.copymap, st, meta)
else:
parse_dirstate = util.nogc(parsers.parse_dirstate)
p = parse_dirstate(self._map, self.copymap, st)
@@ -597,38 +641,37 @@
testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
if self._use_dirstate_v2:
- self.docket # load the data if needed
- inode = (
- self.identity.stat.st_ino
- if self.identity is not None
- and self.identity.stat is not None
- else None
- )
- testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
- if not self.docket.uuid:
- data = b''
- self._map = rustmod.DirstateMap.new_empty()
+ try:
+ self.docket
+ except error.CorruptedDirstate as e:
+ # fall back to dirstate-v1 if we fail to read v2
+ parents = self._v1_map(e)
else:
- data = self._read_v2_data()
- self._map = rustmod.DirstateMap.new_v2(
- data,
- self.docket.data_size,
- self.docket.tree_metadata,
- self.docket.uuid,
- inode,
+ parents = self.docket.parents
+ inode = (
+ self.identity.stat.st_ino
+ if self.identity is not None
+ and self.identity.stat is not None
+ else None
+ )
+ testing.wait_on_cfg(
+ self._ui, b'dirstate.post-docket-read-file'
)
- parents = self.docket.parents
+ if not self.docket.uuid:
+ data = b''
+ self._map = rustmod.DirstateMap.new_empty()
+ else:
+ data = self._read_v2_data()
+ self._map = rustmod.DirstateMap.new_v2(
+ data,
+ self.docket.data_size,
+ self.docket.tree_metadata,
+ self.docket.uuid,
+ inode,
+ )
+ parents = self.docket.parents
else:
- self._set_identity()
- inode = (
- self.identity.stat.st_ino
- if self.identity is not None
- and self.identity.stat is not None
- else None
- )
- self._map, parents = rustmod.DirstateMap.new_v1(
- self._readdirstatefile(), inode
- )
+ parents = self._v1_map()
if parents and not self._dirtyparents:
self.setparents(*parents)
@@ -638,6 +681,23 @@
self.get = self._map.get
return self._map
+ def _v1_map(self, from_v2_exception=None):
+ self._set_identity()
+ inode = (
+ self.identity.stat.st_ino
+ if self.identity is not None and self.identity.stat is not None
+ else None
+ )
+ try:
+ self._map, parents = rustmod.DirstateMap.new_v1(
+ self._readdirstatefile(), inode
+ )
+ except OSError as e:
+ if from_v2_exception is not None:
+ raise e from from_v2_exception
+ raise
+ return parents
+
@property
def copymap(self):
return self._map.copymap()
@@ -696,9 +756,15 @@
self._dirtyparents = False
return
+ write_mode = self._write_mode
+ try:
+ docket = self.docket
+ except error.CorruptedDirstate:
+ # fall back to dirstate-v1 if we fail to parse v2
+ docket = None
+
# We can only append to an existing data file if there is one
- write_mode = self._write_mode
- if self.docket.uuid is None:
+ if docket is None or docket.uuid is None:
write_mode = WRITE_MODE_FORCE_NEW
packed, meta, append = self._map.write_v2(write_mode)
if append:
--- a/mercurial/error.py Mon Jun 05 17:29:52 2023 +0200
+++ b/mercurial/error.py Mon Jun 05 16:43:27 2023 +0200
@@ -650,6 +650,13 @@
__bytes__ = _tobytes
+class CorruptedDirstate(Exception):
+ """error raised the dirstate appears corrupted on-disk. It may be due to
+ a dirstate version mismatch (i.e. expecting v2 and finding v1 on disk)."""
+
+ __bytes__ = _tobytes
+
+
class PeerTransportError(Abort):
"""Transport-level I/O error when communicating with a peer repo."""
--- a/rust/hg-core/src/repo.rs Mon Jun 05 17:29:52 2023 +0200
+++ b/rust/hg-core/src/repo.rs Mon Jun 05 16:43:27 2023 +0200
@@ -238,9 +238,10 @@
/// a dirstate-v2 file will indeed be found, but in rare cases (like the
/// upgrade mechanism being cut short), the on-disk version will be a
/// v1 file.
- /// Semantically, having a requirement only means that a client should be
- /// able to understand the repo *if* it uses the requirement, but not that
- /// the requirement is actually used.
+ /// Semantically, having a requirement only means that a client cannot
+ /// properly understand or properly update the repo if it lacks the support
+ /// for the required feature, but not that that feature is actually used
+ /// in all occasions.
pub fn use_dirstate_v2(&self) -> bool {
self.requirements
.contains(requirements::DIRSTATE_V2_REQUIREMENT)
@@ -287,9 +288,20 @@
let parents = if dirstate.is_empty() {
DirstateParents::NULL
} else if self.use_dirstate_v2() {
- let docket =
- crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
- docket.parents()
+ let docket_res =
+ crate::dirstate_tree::on_disk::read_docket(&dirstate);
+ match docket_res {
+ Ok(docket) => docket.parents(),
+ Err(_) => {
+ log::info!(
+ "Parsing dirstate docket failed, \
+ falling back to dirstate-v1"
+ );
+ *crate::dirstate::parsers::parse_dirstate_parents(
+ &dirstate,
+ )?
+ }
+ }
} else {
*crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
};
@@ -317,10 +329,30 @@
self.dirstate_parents.set(DirstateParents::NULL);
Ok((identity, None, 0))
} else {
- let docket =
- crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
- self.dirstate_parents.set(docket.parents());
- Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
+ let docket_res =
+ crate::dirstate_tree::on_disk::read_docket(&dirstate);
+ match docket_res {
+ Ok(docket) => {
+ self.dirstate_parents.set(docket.parents());
+ Ok((
+ identity,
+ Some(docket.uuid.to_owned()),
+ docket.data_size(),
+ ))
+ }
+ Err(_) => {
+ log::info!(
+ "Parsing dirstate docket failed, \
+ falling back to dirstate-v1"
+ );
+ let parents =
+ *crate::dirstate::parsers::parse_dirstate_parents(
+ &dirstate,
+ )?;
+ self.dirstate_parents.set(parents);
+ Ok((identity, None, 0))
+ }
+ }
}
}
@@ -352,7 +384,13 @@
);
continue;
}
- _ => return Err(e),
+ _ => {
+ log::info!(
+ "Reading dirstate v2 failed, \
+ falling back to v1"
+ );
+ return self.new_dirstate_map_v1();
+ }
},
}
}
@@ -363,23 +401,22 @@
);
Err(DirstateError::Common(error))
} else {
- debug_wait_for_file_or_print(
- self.config(),
- "dirstate.pre-read-file",
- );
- let identity = self.dirstate_identity()?;
- let dirstate_file_contents = self.dirstate_file_contents()?;
- if dirstate_file_contents.is_empty() {
- self.dirstate_parents.set(DirstateParents::NULL);
- Ok(OwningDirstateMap::new_empty(Vec::new()))
- } else {
- let (map, parents) = OwningDirstateMap::new_v1(
- dirstate_file_contents,
- identity,
- )?;
- self.dirstate_parents.set(parents);
- Ok(map)
- }
+ self.new_dirstate_map_v1()
+ }
+ }
+
+ fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
+ debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
+ let identity = self.dirstate_identity()?;
+ let dirstate_file_contents = self.dirstate_file_contents()?;
+ if dirstate_file_contents.is_empty() {
+ self.dirstate_parents.set(DirstateParents::NULL);
+ Ok(OwningDirstateMap::new_empty(Vec::new()))
+ } else {
+ let (map, parents) =
+ OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
+ self.dirstate_parents.set(parents);
+ Ok(map)
}
}
--- a/tests/test-dirstate-version-fallback.t Mon Jun 05 17:29:52 2023 +0200
+++ b/tests/test-dirstate-version-fallback.t Mon Jun 05 16:43:27 2023 +0200
@@ -20,7 +20,7 @@
Upgrade it to v2
- $ hg debugupgraderepo -q --config format.use-dirstate-v2=1 --run | grep added
+ $ hg debugupgraderepo -q --config format.use-dirstate-v2=1 --run | egrep 'added:|removed:'
added: dirstate-v2
$ hg debugrequires | grep dirstate
dirstate-v2
@@ -35,9 +35,15 @@
There should be no errors, but a v2 dirstate should be written back to disk
$ hg st
- abort: dirstate-v2 parse error: when reading docket, Expected at least * bytes, got * (glob) (known-bad-output !)
- [255]
$ ls -1 .hg/dirstate*
.hg/dirstate
- .hg/dirstate.* (glob) (missing-correct-output !)
+ .hg/dirstate.* (glob)
+
+Corrupt the dirstate to see how the errors show up to the user
+ $ echo "I ate your data" > .hg/dirstate
+ $ hg st
+ abort: working directory state appears damaged! (no-rhg !)
+ (falling back to dirstate-v1 from v2 also failed) (no-rhg !)
+ abort: Too little data for dirstate. (rhg !)
+ [255]