changeset 50389:bf16ef96defe stable

rust-dirstate: fall back to v1 if reading v2 failed This will help us not fail when a v1 dirstate is present on disk while a v2 was expected (which could happen with a racy/interrupted upgrade).
author Raphaël Gomès <rgomes@octobus.net>
date Mon, 05 Jun 2023 16:43:27 +0200
parents 9e08cfbe77b1
children 978ffa09910b
files mercurial/dirstatemap.py mercurial/error.py rust/hg-core/src/repo.rs tests/test-dirstate-version-fallback.t
diffstat 4 files changed, 212 insertions(+), 96 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/dirstatemap.py	Mon Jun 05 17:29:52 2023 +0200
+++ b/mercurial/dirstatemap.py	Mon Jun 05 16:43:27 2023 +0200
@@ -4,6 +4,7 @@
 # GNU General Public License version 2 or any later version.
 
 
+import struct
 from .i18n import _
 
 from . import (
@@ -151,9 +152,15 @@
                     b'dirstate only has a docket in v2 format'
                 )
             self._set_identity()
-            self._docket = docketmod.DirstateDocket.parse(
-                self._readdirstatefile(), self._nodeconstants
-            )
+            try:
+                self._docket = docketmod.DirstateDocket.parse(
+                    self._readdirstatefile(), self._nodeconstants
+                )
+            except struct.error:
+                self._ui.debug(b"failed to read dirstate-v2 data")
+                raise error.CorruptedDirstate(
+                    b"failed to read dirstate-v2 data"
+                )
         return self._docket
 
     def _read_v2_data(self):
@@ -176,11 +183,23 @@
         return self._opener.read(self.docket.data_filename())
 
     def write_v2_no_append(self, tr, st, meta, packed):
-        old_docket = self.docket
+        try:
+            old_docket = self.docket
+        except error.CorruptedDirstate:
+            # This means we've identified a dirstate-v1 file on-disk when we
+            # were expecting a dirstate-v2 docket. We've managed to recover
+            # from that unexpected situation, and now we want to write back a
+            # dirstate-v2 file to make the on-disk situation right again.
+            #
+            # This shouldn't be triggered since `self.docket` is cached and
+            # we would have called parents() or read() first, but it's here
+            # just in case.
+            old_docket = None
+
         new_docket = docketmod.DirstateDocket.with_new_uuid(
             self.parents(), len(packed), meta
         )
-        if old_docket.uuid == new_docket.uuid:
+        if old_docket is not None and old_docket.uuid == new_docket.uuid:
             raise error.ProgrammingError(b'dirstate docket name collision')
         data_filename = new_docket.data_filename()
         self._opener.write(data_filename, packed)
@@ -194,7 +213,7 @@
         st.close()
         # Remove the old data file after the new docket pointing to
         # the new data file was written.
-        if old_docket.uuid:
+        if old_docket is not None and old_docket.uuid:
             data_filename = old_docket.data_filename()
             if tr is not None:
                 tr.addbackup(data_filename, location=b'plain')
@@ -211,28 +230,40 @@
     def parents(self):
         if not self._parents:
             if self._use_dirstate_v2:
-                self._parents = self.docket.parents
+                try:
+                    self.docket
+                except error.CorruptedDirstate as e:
+                    # fall back to dirstate-v1 if we fail to read v2
+                    self._v1_parents(e)
+                else:
+                    self._parents = self.docket.parents
             else:
-                read_len = self._nodelen * 2
-                st = self._readdirstatefile(read_len)
-                l = len(st)
-                if l == read_len:
-                    self._parents = (
-                        st[: self._nodelen],
-                        st[self._nodelen : 2 * self._nodelen],
-                    )
-                elif l == 0:
-                    self._parents = (
-                        self._nodeconstants.nullid,
-                        self._nodeconstants.nullid,
-                    )
-                else:
-                    raise error.Abort(
-                        _(b'working directory state appears damaged!')
-                    )
+                self._v1_parents()
 
         return self._parents
 
+    def _v1_parents(self, from_v2_exception=None):
+        read_len = self._nodelen * 2
+        st = self._readdirstatefile(read_len)
+        l = len(st)
+        if l == read_len:
+            self._parents = (
+                st[: self._nodelen],
+                st[self._nodelen : 2 * self._nodelen],
+            )
+        elif l == 0:
+            self._parents = (
+                self._nodeconstants.nullid,
+                self._nodeconstants.nullid,
+            )
+        else:
+            hint = None
+            if from_v2_exception is not None:
+                hint = _(b"falling back to dirstate-v1 from v2 also failed")
+            raise error.Abort(
+                _(b'working directory state appears damaged!'), hint
+            )
+
 
 class dirstatemap(_dirstatemapcommon):
     """Map encapsulating the dirstate's contents.
@@ -330,11 +361,17 @@
     def read(self):
         testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
         if self._use_dirstate_v2:
-
-            if not self.docket.uuid:
-                return
-            testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
-            st = self._read_v2_data()
+            try:
+                self.docket
+            except error.CorruptedDirstate:
+                # fall back to dirstate-v1 if we fail to read v2
+                self._set_identity()
+                st = self._readdirstatefile()
+            else:
+                if not self.docket.uuid:
+                    return
+                testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
+                st = self._read_v2_data()
         else:
             self._set_identity()
             st = self._readdirstatefile()
@@ -365,10 +402,17 @@
         #
         # (we cannot decorate the function directly since it is in a C module)
         if self._use_dirstate_v2:
-            p = self.docket.parents
-            meta = self.docket.tree_metadata
-            parse_dirstate = util.nogc(v2.parse_dirstate)
-            parse_dirstate(self._map, self.copymap, st, meta)
+            try:
+                self.docket
+            except error.CorruptedDirstate:
+                # fall back to dirstate-v1 if we fail to parse v2
+                parse_dirstate = util.nogc(parsers.parse_dirstate)
+                p = parse_dirstate(self._map, self.copymap, st)
+            else:
+                p = self.docket.parents
+                meta = self.docket.tree_metadata
+                parse_dirstate = util.nogc(v2.parse_dirstate)
+                parse_dirstate(self._map, self.copymap, st, meta)
         else:
             parse_dirstate = util.nogc(parsers.parse_dirstate)
             p = parse_dirstate(self._map, self.copymap, st)
@@ -597,38 +641,37 @@
 
             testing.wait_on_cfg(self._ui, b'dirstate.pre-read-file')
             if self._use_dirstate_v2:
-                self.docket  # load the data if needed
-                inode = (
-                    self.identity.stat.st_ino
-                    if self.identity is not None
-                    and self.identity.stat is not None
-                    else None
-                )
-                testing.wait_on_cfg(self._ui, b'dirstate.post-docket-read-file')
-                if not self.docket.uuid:
-                    data = b''
-                    self._map = rustmod.DirstateMap.new_empty()
+                try:
+                    self.docket
+                except error.CorruptedDirstate as e:
+                    # fall back to dirstate-v1 if we fail to read v2
+                    parents = self._v1_map(e)
                 else:
-                    data = self._read_v2_data()
-                    self._map = rustmod.DirstateMap.new_v2(
-                        data,
-                        self.docket.data_size,
-                        self.docket.tree_metadata,
-                        self.docket.uuid,
-                        inode,
+                    parents = self.docket.parents
+                    inode = (
+                        self.identity.stat.st_ino
+                        if self.identity is not None
+                        and self.identity.stat is not None
+                        else None
+                    )
+                    testing.wait_on_cfg(
+                        self._ui, b'dirstate.post-docket-read-file'
                     )
-                parents = self.docket.parents
+                    if not self.docket.uuid:
+                        data = b''
+                        self._map = rustmod.DirstateMap.new_empty()
+                    else:
+                        data = self._read_v2_data()
+                        self._map = rustmod.DirstateMap.new_v2(
+                            data,
+                            self.docket.data_size,
+                            self.docket.tree_metadata,
+                            self.docket.uuid,
+                            inode,
+                        )
+                    parents = self.docket.parents
             else:
-                self._set_identity()
-                inode = (
-                    self.identity.stat.st_ino
-                    if self.identity is not None
-                    and self.identity.stat is not None
-                    else None
-                )
-                self._map, parents = rustmod.DirstateMap.new_v1(
-                    self._readdirstatefile(), inode
-                )
+                parents = self._v1_map()
 
             if parents and not self._dirtyparents:
                 self.setparents(*parents)
@@ -638,6 +681,23 @@
             self.get = self._map.get
             return self._map
 
+        def _v1_map(self, from_v2_exception=None):
+            self._set_identity()
+            inode = (
+                self.identity.stat.st_ino
+                if self.identity is not None and self.identity.stat is not None
+                else None
+            )
+            try:
+                self._map, parents = rustmod.DirstateMap.new_v1(
+                    self._readdirstatefile(), inode
+                )
+            except OSError as e:
+                if from_v2_exception is not None:
+                    raise e from from_v2_exception
+                raise
+            return parents
+
         @property
         def copymap(self):
             return self._map.copymap()
@@ -696,9 +756,15 @@
                 self._dirtyparents = False
                 return
 
+            write_mode = self._write_mode
+            try:
+                docket = self.docket
+            except error.CorruptedDirstate:
+                # fall back to dirstate-v1 if we fail to parse v2
+                docket = None
+
             # We can only append to an existing data file if there is one
-            write_mode = self._write_mode
-            if self.docket.uuid is None:
+            if docket is None or docket.uuid is None:
                 write_mode = WRITE_MODE_FORCE_NEW
             packed, meta, append = self._map.write_v2(write_mode)
             if append:
--- a/mercurial/error.py	Mon Jun 05 17:29:52 2023 +0200
+++ b/mercurial/error.py	Mon Jun 05 16:43:27 2023 +0200
@@ -650,6 +650,13 @@
     __bytes__ = _tobytes
 
 
+class CorruptedDirstate(Exception):
+    """error raised the dirstate appears corrupted on-disk. It may be due to
+    a dirstate version mismatch (i.e. expecting v2 and finding v1 on disk)."""
+
+    __bytes__ = _tobytes
+
+
 class PeerTransportError(Abort):
     """Transport-level I/O error when communicating with a peer repo."""
 
--- a/rust/hg-core/src/repo.rs	Mon Jun 05 17:29:52 2023 +0200
+++ b/rust/hg-core/src/repo.rs	Mon Jun 05 16:43:27 2023 +0200
@@ -238,9 +238,10 @@
     /// a dirstate-v2 file will indeed be found, but in rare cases (like the
     /// upgrade mechanism being cut short), the on-disk version will be a
     /// v1 file.
-    /// Semantically, having a requirement only means that a client should be
-    /// able to understand the repo *if* it uses the requirement, but not that
-    /// the requirement is actually used.
+    /// Semantically, having a requirement only means that a client cannot
+    /// properly understand or properly update the repo if it lacks the support
+    /// for the required feature, but not that that feature is actually used
+    /// in all occasions.
     pub fn use_dirstate_v2(&self) -> bool {
         self.requirements
             .contains(requirements::DIRSTATE_V2_REQUIREMENT)
@@ -287,9 +288,20 @@
         let parents = if dirstate.is_empty() {
             DirstateParents::NULL
         } else if self.use_dirstate_v2() {
-            let docket =
-                crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
-            docket.parents()
+            let docket_res =
+                crate::dirstate_tree::on_disk::read_docket(&dirstate);
+            match docket_res {
+                Ok(docket) => docket.parents(),
+                Err(_) => {
+                    log::info!(
+                        "Parsing dirstate docket failed, \
+                        falling back to dirstate-v1"
+                    );
+                    *crate::dirstate::parsers::parse_dirstate_parents(
+                        &dirstate,
+                    )?
+                }
+            }
         } else {
             *crate::dirstate::parsers::parse_dirstate_parents(&dirstate)?
         };
@@ -317,10 +329,30 @@
             self.dirstate_parents.set(DirstateParents::NULL);
             Ok((identity, None, 0))
         } else {
-            let docket =
-                crate::dirstate_tree::on_disk::read_docket(&dirstate)?;
-            self.dirstate_parents.set(docket.parents());
-            Ok((identity, Some(docket.uuid.to_owned()), docket.data_size()))
+            let docket_res =
+                crate::dirstate_tree::on_disk::read_docket(&dirstate);
+            match docket_res {
+                Ok(docket) => {
+                    self.dirstate_parents.set(docket.parents());
+                    Ok((
+                        identity,
+                        Some(docket.uuid.to_owned()),
+                        docket.data_size(),
+                    ))
+                }
+                Err(_) => {
+                    log::info!(
+                        "Parsing dirstate docket failed, \
+                        falling back to dirstate-v1"
+                    );
+                    let parents =
+                        *crate::dirstate::parsers::parse_dirstate_parents(
+                            &dirstate,
+                        )?;
+                    self.dirstate_parents.set(parents);
+                    Ok((identity, None, 0))
+                }
+            }
         }
     }
 
@@ -352,7 +384,13 @@
                             );
                             continue;
                         }
-                        _ => return Err(e),
+                        _ => {
+                            log::info!(
+                                "Reading dirstate v2 failed, \
+                                falling back to v1"
+                            );
+                            return self.new_dirstate_map_v1();
+                        }
                     },
                 }
             }
@@ -363,23 +401,22 @@
             );
             Err(DirstateError::Common(error))
         } else {
-            debug_wait_for_file_or_print(
-                self.config(),
-                "dirstate.pre-read-file",
-            );
-            let identity = self.dirstate_identity()?;
-            let dirstate_file_contents = self.dirstate_file_contents()?;
-            if dirstate_file_contents.is_empty() {
-                self.dirstate_parents.set(DirstateParents::NULL);
-                Ok(OwningDirstateMap::new_empty(Vec::new()))
-            } else {
-                let (map, parents) = OwningDirstateMap::new_v1(
-                    dirstate_file_contents,
-                    identity,
-                )?;
-                self.dirstate_parents.set(parents);
-                Ok(map)
-            }
+            self.new_dirstate_map_v1()
+        }
+    }
+
+    fn new_dirstate_map_v1(&self) -> Result<OwningDirstateMap, DirstateError> {
+        debug_wait_for_file_or_print(self.config(), "dirstate.pre-read-file");
+        let identity = self.dirstate_identity()?;
+        let dirstate_file_contents = self.dirstate_file_contents()?;
+        if dirstate_file_contents.is_empty() {
+            self.dirstate_parents.set(DirstateParents::NULL);
+            Ok(OwningDirstateMap::new_empty(Vec::new()))
+        } else {
+            let (map, parents) =
+                OwningDirstateMap::new_v1(dirstate_file_contents, identity)?;
+            self.dirstate_parents.set(parents);
+            Ok(map)
         }
     }
 
--- a/tests/test-dirstate-version-fallback.t	Mon Jun 05 17:29:52 2023 +0200
+++ b/tests/test-dirstate-version-fallback.t	Mon Jun 05 16:43:27 2023 +0200
@@ -20,7 +20,7 @@
 
 Upgrade it to v2
 
-  $ hg debugupgraderepo -q --config format.use-dirstate-v2=1 --run | grep added
+  $ hg debugupgraderepo -q --config format.use-dirstate-v2=1 --run | egrep 'added:|removed:'
      added: dirstate-v2
   $ hg debugrequires | grep dirstate
   dirstate-v2
@@ -35,9 +35,15 @@
 
 There should be no errors, but a v2 dirstate should be written back to disk
   $ hg st
-  abort: dirstate-v2 parse error: when reading docket, Expected at least * bytes, got * (glob) (known-bad-output !)
-  [255]
   $ ls -1 .hg/dirstate*
   .hg/dirstate
-  .hg/dirstate.* (glob) (missing-correct-output !)
+  .hg/dirstate.* (glob)
+
+Corrupt the dirstate to see how the errors show up to the user
+  $ echo "I ate your data" > .hg/dirstate
 
+  $ hg st
+  abort: working directory state appears damaged! (no-rhg !)
+  (falling back to dirstate-v1 from v2 also failed) (no-rhg !)
+  abort: Too little data for dirstate. (rhg !)
+  [255]