Mercurial > hg
changeset 50660:bf16ef96defe stable
rust-dirstate: fall back to v1 if reading v2 failed
This will help us not fail when a v1 dirstate is present on disk while a v2
was expected (which could happen with a racy/interrupted upgrade).
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 05 Jun 2023 16:43:27 +0200 |
parents | 9e08cfbe77b1 |
children | 978ffa09910b |
files | mercurial/dirstatemap.py mercurial/error.py rust/hg-core/src/repo.rs tests/test-dirstate-version-fallback.t |
diffstat | 4 files changed, 212 insertions(+), 96 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/dirstatemap.py Mon Jun 05 17:29:52 2023 +0200 +++ b/mercurial/dirstatemap.py Mon Jun 05 16:43:27 2023 +0200 @@ -4,6 +4,7 @@ # GNU General Public License version 2 or any later version. +import struct from .i18n import _ from . import ( @@ -151,9 +152,15 @@ b'dirstate only has a docket in v2 format' ) self._set_identity() - self._docket = docketmod.DirstateDocket.parse( - self._readdirstatefile(), self._nodeconstants - ) + try: + self._docket = docketmod.DirstateDocket.parse( + self._readdirstatefile(), self._nodeconstants + ) + except struct.error: + self._ui.debug(b"failed to read dirstate-v2 data") + raise error.CorruptedDirstate( + b"failed to read dirstate-v2 data" + ) return self._docket def _read_v2_data(self): @@ -176,11 +183,23 @@ return self._opener.read(self.docket.data_filename()) def write_v2_no_append(self, tr, st, meta, packed): - old_docket = self.docket + try: + old_docket = self.docket + except error.CorruptedDirstate: + # This means we've identified a dirstate-v1 file on-disk when we + # were expecting a dirstate-v2 docket. We've managed to recover + # from that unexpected situation, and now we want to write back a + # dirstate-v2 file to make the on-disk situation right again. + # + # This shouldn't be triggered since `self.docket` is cached and + # we would have called parents() or read() first, but it's here + # just in case. + old_docket = None + new_docket = docketmod.DirstateDocket.with_new_uuid( self.parents(), len(packed), meta ) - if old_docket.uuid == new_docket.uuid: + if old_docket is not None and old_docket.uuid == new_docket.uuid: raise error.ProgrammingError(b'dirstate docket name collision') data_filename = new_docket.data_filename() self._opener.write(data_filename, packed) @@ -194,7 +213,7 @@ st.close() # Remove the old data file after the new docket pointing to # the new data file was written. - if old_docket.uuid: + if old_docket is not None and old_docket.uuid: data_filename = old_docket.data_filename() if tr is not None: tr.addbackup(data_filename, location=b'plain') @@ -211,28 +230,40 @@ def parents(self): if not self._parents: if self._use_dirstate_v2: - self._parents = self.docket.parents + try: + self.docket + except error.CorruptedDirstate as e: + # fall back to dirstate-v1 if we fail to read v2 + self._v1_parents(e) + else: + self._parents = self.docket.parents else: - read_len = self._nodelen * 2 - st = self._readdirstatefile(read_len) - l = len(st) - if l == read_len: - self._parents = ( - st[: self._nodelen], - st[self._nodelen : 2 * self._nodelen], - ) - elif l == 0: - self._parents = ( - self._nodeconstants.nullid, - self._nodeconstants.nullid, - ) - else: - raise error.Abort( - _(b'working directory state appears damaged!') - ) + self._v1_parents() return self._parents + def _v1_parents(self, from_v2_exception=None): + read_len = self._nodelen * 2 + st = self._readdirstatefile(read_len) + l = len(st) + if l == read_len: + self._parents = ( + st[: self._nodelen], + st[self._nodelen : 2 * self._nodelen], + ) + elif l == 0: + self._parents = ( + self._nodeconstants.nullid, + self._nodeconstants.nullid, + ) + else: + hint = None + if from_v2_exception is not None: + hint = _(b"falling back to dirstate-v1 from v2 also failed") + raise error.Abort( + _(b'working directory state appears damaged!'), hint + ) + class dirstatemap(_dirstatemapcommon): """Map encapsulating the dirstate's contents. @@ -330,11 +361,17 @@ def read(self): testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file') if self._use_dirstate_v2: - - if not self.docket.uuid: - return - testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') - st = self._read_v2_data() + try: + self.docket + except error.CorruptedDirstate: + # fall back to dirstate-v1 if we fail to read v2 + self._set_identity() + st = self._readdirstatefile() + else: + if not self.docket.uuid: + return + testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') + st = self._read_v2_data() else: self._set_identity() st = self._readdirstatefile() @@ -365,10 +402,17 @@ # # (we cannot decorate the function directly since it is in a C module) if self._use_dirstate_v2: - p = self.docket.parents - meta = self.docket.tree_metadata - parse_dirstate = util.nogc(v2.parse_dirstate) - parse_dirstate(self._map, self.copymap, st, meta) + try: + self.docket + except error.CorruptedDirstate: + # fall back to dirstate-v1 if we fail to parse v2 + parse_dirstate = util.nogc(parsers.parse_dirstate) + p = parse_dirstate(self._map, self.copymap, st) + else: + p = self.docket.parents + meta = self.docket.tree_metadata + parse_dirstate = util.nogc(v2.parse_dirstate) + parse_dirstate(self._map, self.copymap, st, meta) else: parse_dirstate = util.nogc(parsers.parse_dirstate) p = parse_dirstate(self._map, self.copymap, st) @@ -597,38 +641,37 @@ testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file') if self._use_dirstate_v2: - self.docket # load the data if needed - inode = ( - self.identity.stat.st_ino - if self.identity is not None - and self.identity.stat is not None - else None - ) - testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file') - if not self.docket.uuid: - data = b'' - self._map = rustmod.DirstateMap.new_empty() + try: + self.docket + except error.CorruptedDirstate as e: + # fall back to dirstate-v1 if we fail to read v2 + parents = self._v1_map(e) else: - data = self._read_v2_data() - self._map = rustmod.DirstateMap.new_v2( - data, - self.docket.data_size, - self.docket.tree_metadata, - self.docket.uuid, - inode, + parents = self.docket.parents + inode = ( + self.identity.stat.st_ino + if self.identity is not None + and self.identity.stat is not None + else None + ) + testing.wait_on_cfg( + self._ui, b'dirstate.post-docket-read-file' ) - parents = self.docket.parents + if not self.docket.uuid: + data = b'' + self._map = rustmod.DirstateMap.new_empty() + else: + data = self._read_v2_data() + self._map = rustmod.DirstateMap.new_v2( + data, + self.docket.data_size, + self.docket.tree_metadata, + self.docket.uuid, + inode, + ) + parents = self.docket.parents else: - self._set_identity() - inode = ( - self.identity.stat.st_ino - if self.identity is not None - and self.identity.stat is not None - else None - ) - self._map, parents = rustmod.DirstateMap.new_v1( - self._readdirstatefile(), inode - ) + parents = self._v1_map() if parents and not self._dirtyparents: self.setparents(*parents) @@ -638,6 +681,23 @@ self.get = self._map.get return self._map + def _v1_map(self, from_v2_exception=None): + self._set_identity() + inode = ( + self.identity.stat.st_ino + if self.identity is not None and self.identity.stat is not None + else None + ) + try: + self._map, parents = rustmod.DirstateMap.new_v1( + self._readdirstatefile(), inode + ) + except OSError as e: + if from_v2_exception is not None: + raise e from from_v2_exception + raise + return parents + @property def copymap(self): return self._map.copymap() @@ -696,9 +756,15 @@ self._dirtyparents = False return + write_mode = self._write_mode + try: + docket = self.docket + except error.CorruptedDirstate: + # fall back to dirstate-v1 if we fail to parse v2 + docket = None + # We can only append to an existing data file if there is one - write_mode = self._write_mode - if self.docket.uuid is None: + if docket is None or docket.uuid is None: write_mode = WRITE_MODE_FORCE_NEW packed, meta, append = self._map.write_v2(write_mode) if append:
--- a/mercurial/error.py Mon Jun 05 17:29:52 2023 +0200 +++ b/mercurial/error.py Mon Jun 05 16:43:27 2023 +0200 @@ -650,6 +650,13 @@ __bytes__ = _tobytes +class CorruptedDirstate(Exception): + """error raised the dirstate appears corrupted on-disk. It may be due to + a dirstate version mismatch (i.e. expecting v2 and finding v1 on disk).""" + + __bytes__ = _tobytes + + class PeerTransportError(Abort): """Transport-level I/O error when communicating with a peer repo."""
--- a/rust/hg-core/src/repo.rs Mon Jun 05 17:29:52 2023 +0200 +++ b/rust/hg-core/src/repo.rs Mon Jun 05 16:43:27 2023 +0200 @@ -238,9 +238,10 @@ /// a dirstate-v2 file will indeed be found, but in rare cases (like the /// upgrade mechanism being cut short), the on-disk version will be a /// v1 file. - /// Semantically, having a requirement only means that a client should be - /// able to understand the repo *if* it uses the requirement, but not that - /// the requirement is actually used. + /// Semantically, having a requirement only means that a client cannot + /// properly understand or properly update the repo if it lacks the support + /// for the required feature, but not that that feature is actually used + /// in all occasions. pub fn use_dirstate_v2(&self) -> bool { self.requirements .contains(requirements::DIRSTATE_V2_REQUIREMENT) @@ -287,9 +288,20 @@ let parents = if dirstate.is_empty() { DirstateParents::NULL } else if self.use_dirstate_v2() { - let docket = - crate::dirstate_tree::on_disk::read_docket(&dirstate)?; - docket.parents() + let docket_res = + crate::dirstate_tree::on_disk::read_docket(&dirstate); + match docket_res { + Ok(docket) => docket.parents(), + Err(_) => { + log::info!( + "Parsing dirstate docket failed, \ + falling back to dirstate-v1" + ); + *crate::dirstate::parsers::parse_dirstate_parents( + &dirstate, + )? + } + } } else { *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)? }; @@ -317,10 +329,30 @@ self.dirstate_parents.set(DirstateParents::NULL); Ok((identity, None, 0)) } else { - let docket = - crate::dirstate_tree::on_disk::read_docket(&dirstate)?; - self.dirstate_parents.set(docket.parents()); - Ok((identity, Some(docket.uuid.to_owned()), docket.data_size())) + let docket_res = + crate::dirstate_tree::on_disk::read_docket(&dirstate); + match docket_res { + Ok(docket) => { + self.dirstate_parents.set(docket.parents()); + Ok(( + identity, + Some(docket.uuid.to_owned()), + docket.data_size(), + )) + } + Err(_) => { + log::info!( + "Parsing dirstate docket failed, \ + falling back to dirstate-v1" + ); + let parents = + *crate::dirstate::parsers::parse_dirstate_parents( + &dirstate, + )?; + self.dirstate_parents.set(parents); + Ok((identity, None, 0)) + } + } } } @@ -352,7 +384,13 @@ ); continue; } - _ => return Err(e), + _ => { + log::info!( + "Reading dirstate v2 failed, \ + falling back to v1" + ); + return self.new_dirstate_map_v1(); + } }, } } @@ -363,23 +401,22 @@ ); Err(DirstateError::Common(error)) } else { - debug_wait_for_file_or_print( - self.config(), - "dirstate.pre-read-file", - ); - let identity = self.dirstate_identity()?; - let dirstate_file_contents = self.dirstate_file_contents()?; - if dirstate_file_contents.is_empty() { - self.dirstate_parents.set(DirstateParents::NULL); - Ok(OwningDirstateMap::new_empty(Vec::new())) - } else { - let (map, parents) = OwningDirstateMap::new_v1( - dirstate_file_contents, - identity, - )?; - self.dirstate_parents.set(parents); - Ok(map) - } + self.new_dirstate_map_v1() + } + } + + fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> { + debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file"); + let identity = self.dirstate_identity()?; + let dirstate_file_contents = self.dirstate_file_contents()?; + if dirstate_file_contents.is_empty() { + self.dirstate_parents.set(DirstateParents::NULL); + Ok(OwningDirstateMap::new_empty(Vec::new())) + } else { + let (map, parents) = + OwningDirstateMap::new_v1(dirstate_file_contents, identity)?; + self.dirstate_parents.set(parents); + Ok(map) } }
--- a/tests/test-dirstate-version-fallback.t Mon Jun 05 17:29:52 2023 +0200 +++ b/tests/test-dirstate-version-fallback.t Mon Jun 05 16:43:27 2023 +0200 @@ -20,7 +20,7 @@ Upgrade it to v2 - $ hg debugupgraderepo -q --config format.use-dirstate-v2=1 --run | grep added + $ hg debugupgraderepo -q --config format.use-dirstate-v2=1 --run | egrep 'added:|removed:' added: dirstate-v2 $ hg debugrequires | grep dirstate dirstate-v2 @@ -35,9 +35,15 @@ There should be no errors, but a v2 dirstate should be written back to disk $ hg st - abort: dirstate-v2 parse error: when reading docket, Expected at least * bytes, got * (glob) (known-bad-output !) - [255] $ ls -1 .hg/dirstate* .hg/dirstate - .hg/dirstate.* (glob) (missing-correct-output !) + .hg/dirstate.* (glob) + +Corrupt the dirstate to see how the errors show up to the user + $ echo "I ate your data" > .hg/dirstate + $ hg st + abort: working directory state appears damaged! (no-rhg !) + (falling back to dirstate-v1 from v2 also failed) (no-rhg !) + abort: Too little data for dirstate. (rhg !) + [255]