changeset 46443:43d63979a75e

rust: use HgError in RevlogError and Vfs Differential Revision: https://phab.mercurial-scm.org/D9897
author Simon Sapin <simon.sapin@octobus.net>
date Wed, 27 Jan 2021 14:45:25 +0100
parents 02d3bb972121
children 6c778d20c8c2
files rust/hg-core/src/errors.rs rust/hg-core/src/lib.rs rust/hg-core/src/operations/cat.rs rust/hg-core/src/operations/list_tracked_files.rs rust/hg-core/src/repo.rs rust/hg-core/src/requirements.rs rust/hg-core/src/revlog/changelog.rs rust/hg-core/src/revlog/index.rs rust/hg-core/src/revlog/node.rs rust/hg-core/src/revlog/nodemap_docket.rs rust/hg-core/src/revlog/revlog.rs rust/rhg/src/error.rs
diffstat 12 files changed, 111 insertions(+), 114 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/errors.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/errors.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -35,6 +35,9 @@
 
 impl HgError {
     pub fn corrupted(explanation: impl Into<String>) -> Self {
+        // TODO: capture a backtrace here and keep it in the error value
+        // to aid debugging?
+        // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html
         HgError::CorruptedRepository(explanation.into())
     }
 }
--- a/rust/hg-core/src/lib.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/lib.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -3,6 +3,7 @@
 //
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
+
 mod ancestors;
 pub mod dagops;
 pub mod errors;
--- a/rust/hg-core/src/operations/cat.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/operations/cat.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -33,8 +33,8 @@
     let changelog = Changelog::open(repo)?;
     let manifest = Manifest::open(repo)?;
     let changelog_entry = changelog.get_rev(rev)?;
-    let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
-        .map_err(|_| RevlogError::Corrupted)?;
+    let manifest_node =
+        Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
     let manifest_entry = manifest.get_node(manifest_node.into())?;
     let mut bytes = vec![];
 
@@ -46,8 +46,7 @@
 
                 let file_log =
                     Revlog::open(repo, &index_path, Some(&data_path))?;
-                let file_node = Node::from_hex(node_bytes)
-                    .map_err(|_| RevlogError::Corrupted)?;
+                let file_node = Node::from_hex_for_repo(node_bytes)?;
                 let file_rev = file_log.get_node_rev(file_node.into())?;
                 let data = file_log.get_rev_data(file_rev)?;
                 if data.starts_with(&METADATA_DELIMITER) {
--- a/rust/hg-core/src/operations/list_tracked_files.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/operations/list_tracked_files.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -6,7 +6,7 @@
 // GNU General Public License version 2 or any later version.
 
 use crate::dirstate::parsers::parse_dirstate;
-use crate::errors::{HgError, IoResultExt};
+use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::changelog::Changelog;
 use crate::revlog::manifest::{Manifest, ManifestEntry};
@@ -25,12 +25,7 @@
 
 impl Dirstate {
     pub fn new(repo: &Repo) -> Result<Self, HgError> {
-        let content = repo
-            .hg_vfs()
-            .read("dirstate")
-            // TODO: this will be more accurate when we use `HgError` in
-            // `Vfs::read`.
-            .for_file("dirstate".as_ref())?;
+        let content = repo.hg_vfs().read("dirstate")?;
         Ok(Self { content })
     }
 
@@ -57,8 +52,8 @@
     let changelog = Changelog::open(repo)?;
     let manifest = Manifest::open(repo)?;
     let changelog_entry = changelog.get_rev(rev)?;
-    let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
-        .map_err(|_| RevlogError::Corrupted)?;
+    let manifest_node =
+        Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
     let manifest_entry = manifest.get_node(manifest_node.into())?;
     Ok(FilesForRev(manifest_entry))
 }
--- a/rust/hg-core/src/repo.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/repo.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -1,4 +1,4 @@
-use crate::errors::HgError;
+use crate::errors::{HgError, IoResultExt};
 use crate::operations::{find_root, FindRootError};
 use crate::requirements;
 use memmap::{Mmap, MmapOptions};
@@ -68,24 +68,19 @@
     pub(crate) fn read(
         &self,
         relative_path: impl AsRef<Path>,
-    ) -> std::io::Result<Vec<u8>> {
-        std::fs::read(self.base.join(relative_path))
-    }
-
-    pub(crate) fn open(
-        &self,
-        relative_path: impl AsRef<Path>,
-    ) -> std::io::Result<std::fs::File> {
-        std::fs::File::open(self.base.join(relative_path))
+    ) -> Result<Vec<u8>, HgError> {
+        let path = self.base.join(relative_path);
+        std::fs::read(&path).for_file(&path)
     }
 
     pub(crate) fn mmap_open(
         &self,
         relative_path: impl AsRef<Path>,
-    ) -> std::io::Result<Mmap> {
-        let file = self.open(relative_path)?;
+    ) -> Result<Mmap, HgError> {
+        let path = self.base.join(relative_path);
+        let file = std::fs::File::open(&path).for_file(&path)?;
         // TODO: what are the safety requirements here?
-        let mmap = unsafe { MmapOptions::new().map(&file) }?;
+        let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?;
         Ok(mmap)
     }
 }
--- a/rust/hg-core/src/requirements.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/requirements.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -1,4 +1,4 @@
-use crate::errors::{HgError, HgResultExt, IoResultExt};
+use crate::errors::{HgError, HgResultExt};
 use crate::repo::Repo;
 
 fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
@@ -22,11 +22,8 @@
 }
 
 pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
-    if let Some(bytes) = repo
-        .hg_vfs()
-        .read("requires")
-        .for_file("requires".as_ref())
-        .io_not_found_as_none()?
+    if let Some(bytes) =
+        repo.hg_vfs().read("requires").io_not_found_as_none()?
     {
         parse(&bytes)
     } else {
--- a/rust/hg-core/src/revlog/changelog.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/changelog.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -1,3 +1,4 @@
+use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::revlog::{Revlog, RevlogError};
 use crate::revlog::NodePrefix;
@@ -53,6 +54,8 @@
     /// Return the node id of the `manifest` referenced by this `changelog`
     /// entry.
     pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
-        self.lines().next().ok_or(RevlogError::Corrupted)
+        self.lines()
+            .next()
+            .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
     }
 }
--- a/rust/hg-core/src/revlog/index.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/index.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -3,6 +3,7 @@
 
 use byteorder::{BigEndian, ByteOrder};
 
+use crate::errors::HgError;
 use crate::revlog::node::Node;
 use crate::revlog::revlog::RevlogError;
 use crate::revlog::{Revision, NULL_REVISION};
@@ -44,7 +45,8 @@
                     offsets: Some(offsets),
                 })
             } else {
-                Err(RevlogError::Corrupted)
+                Err(HgError::corrupted("unexpected inline revlog length")
+                    .into())
             }
         } else {
             Ok(Self {
--- a/rust/hg-core/src/revlog/node.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/node.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -8,6 +8,7 @@
 //! In Mercurial code base, it is customary to call "a node" the binary SHA
 //! of a revision.
 
+use crate::errors::HgError;
 use bytes_cast::BytesCast;
 use std::convert::{TryFrom, TryInto};
 use std::fmt;
@@ -136,6 +137,19 @@
         }
     }
 
+    /// `from_hex`, but for input from an internal file of the repository such
+    /// as a changelog or manifest entry.
+    ///
+    /// An error is treated as repository corruption.
+    pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
+        Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
+            HgError::CorruptedRepository(format!(
+                "Expected a full hexadecimal node ID, found {}",
+                String::from_utf8_lossy(hex.as_ref())
+            ))
+        })
+    }
+
     /// Provide access to binary data
     ///
     /// This is needed by FFI layers, for instance to return expected
--- a/rust/hg-core/src/revlog/nodemap_docket.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/nodemap_docket.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -1,3 +1,4 @@
+use crate::errors::{HgError, HgResultExt};
 use bytes_cast::{unaligned, BytesCast};
 use memmap::Mmap;
 use std::path::{Path, PathBuf};
@@ -38,12 +39,12 @@
         index_path: &Path,
     ) -> Result<Option<(Self, Mmap)>, RevlogError> {
         let docket_path = index_path.with_extension("n");
-        let docket_bytes = match repo.store_vfs().read(&docket_path) {
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-                return Ok(None)
-            }
-            Err(e) => return Err(RevlogError::IoError(e)),
-            Ok(bytes) => bytes,
+        let docket_bytes = if let Some(bytes) =
+            repo.store_vfs().read(&docket_path).io_not_found_as_none()?
+        {
+            bytes
+        } else {
+            return Ok(None);
         };
 
         let input = if let Some((&ONDISK_VERSION, rest)) =
@@ -54,36 +55,40 @@
             return Ok(None);
         };
 
-        let (header, rest) = DocketHeader::from_bytes(input)?;
+        /// Treat any error as a parse error
+        fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> {
+            result.map_err(|_| {
+                HgError::corrupted("nodemap docket parse error").into()
+            })
+        }
+
+        let (header, rest) = parse(DocketHeader::from_bytes(input))?;
         let uid_size = header.uid_size as usize;
         // TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
         // systems?
         let tip_node_size = header.tip_node_size.get() as usize;
         let data_length = header.data_length.get() as usize;
-        let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
-        let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
-        let uid =
-            std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
+        let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
+        let (_tip_node, _rest) =
+            parse(u8::slice_from_bytes(rest, tip_node_size))?;
+        let uid = parse(std::str::from_utf8(uid))?;
         let docket = NodeMapDocket { data_length };
 
         let data_path = rawdata_path(&docket_path, uid);
-        // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
+        // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
         // config is false?
-        match repo.store_vfs().mmap_open(&data_path) {
-            Ok(mmap) => {
-                if mmap.len() >= data_length {
-                    Ok(Some((docket, mmap)))
-                } else {
-                    Err(RevlogError::Corrupted)
-                }
+        if let Some(mmap) = repo
+            .store_vfs()
+            .mmap_open(&data_path)
+            .io_not_found_as_none()?
+        {
+            if mmap.len() >= data_length {
+                Ok(Some((docket, mmap)))
+            } else {
+                Err(HgError::corrupted("persistent nodemap too short").into())
             }
-            Err(error) => {
-                if error.kind() == std::io::ErrorKind::NotFound {
-                    Ok(None)
-                } else {
-                    Err(RevlogError::IoError(error))
-                }
-            }
+        } else {
+            Ok(None)
         }
     }
 }
--- a/rust/hg-core/src/revlog/revlog.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/revlog.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -13,25 +13,34 @@
 use super::index::Index;
 use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
 use super::nodemap;
-use super::nodemap::NodeMap;
+use super::nodemap::{NodeMap, NodeMapError};
 use super::nodemap_docket::NodeMapDocket;
 use super::patch;
+use crate::errors::HgError;
 use crate::repo::Repo;
 use crate::revlog::Revision;
 
+#[derive(derive_more::From)]
 pub enum RevlogError {
-    IoError(std::io::Error),
-    UnsuportedVersion(u16),
     InvalidRevision,
     /// Found more than one entry whose ID match the requested prefix
     AmbiguousPrefix,
-    Corrupted,
-    UnknowDataFormat(u8),
+    #[from]
+    Other(HgError),
 }
 
-impl From<bytes_cast::FromBytesError> for RevlogError {
-    fn from(_: bytes_cast::FromBytesError) -> Self {
-        RevlogError::Corrupted
+impl From<NodeMapError> for RevlogError {
+    fn from(error: NodeMapError) -> Self {
+        match error {
+            NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
+            NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
+        }
+    }
+}
+
+impl RevlogError {
+    fn corrupted() -> Self {
+        RevlogError::Other(HgError::corrupted("corrupted revlog"))
     }
 }
 
@@ -59,14 +68,12 @@
         data_path: Option<&Path>,
     ) -> Result<Self, RevlogError> {
         let index_path = index_path.as_ref();
-        let index_mmap = repo
-            .store_vfs()
-            .mmap_open(&index_path)
-            .map_err(RevlogError::IoError)?;
+        let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
 
         let version = get_version(&index_mmap);
         if version != 1 {
-            return Err(RevlogError::UnsuportedVersion(version));
+            // A proper new version should have had a repo/store requirement.
+            return Err(RevlogError::corrupted());
         }
 
         let index = Index::new(Box::new(index_mmap))?;
@@ -80,10 +87,7 @@
                 None
             } else {
                 let data_path = data_path.unwrap_or(&default_data_path);
-                let data_mmap = repo
-                    .store_vfs()
-                    .mmap_open(data_path)
-                    .map_err(RevlogError::IoError)?;
+                let data_mmap = repo.store_vfs().mmap_open(data_path)?;
                 Some(Box::new(data_mmap))
             };
 
@@ -121,9 +125,7 @@
     ) -> Result<Revision, RevlogError> {
         if let Some(nodemap) = &self.nodemap {
             return nodemap
-                .find_bin(&self.index, node)
-                // TODO: propagate details of this error:
-                .map_err(|_| RevlogError::Corrupted)?
+                .find_bin(&self.index, node)?
                 .ok_or(RevlogError::InvalidRevision);
         }
 
@@ -136,7 +138,9 @@
         let mut found_by_prefix = None;
         for rev in (0..self.len() as Revision).rev() {
             let index_entry =
-                self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
+                self.index.get_entry(rev).ok_or(HgError::corrupted(
+                    "revlog references a revision not in the index",
+                ))?;
             if node == *index_entry.hash() {
                 return Ok(rev);
             }
@@ -167,8 +171,9 @@
         let mut delta_chain = vec![];
         while let Some(base_rev) = entry.base_rev {
             delta_chain.push(entry);
-            entry =
-                self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
+            entry = self
+                .get_entry(base_rev)
+                .map_err(|_| RevlogError::corrupted())?;
         }
 
         // TODO do not look twice in the index
@@ -191,7 +196,7 @@
         ) {
             Ok(data)
         } else {
-            Err(RevlogError::Corrupted)
+            Err(RevlogError::corrupted())
         }
     }
 
@@ -301,7 +306,8 @@
             b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
             // zstd data.
             b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
-            format_type => Err(RevlogError::UnknowDataFormat(format_type)),
+            // A proper new format should have had a repo/store requirement.
+            _format_type => Err(RevlogError::corrupted()),
         }
     }
 
@@ -311,13 +317,13 @@
             let mut buf = Vec::with_capacity(self.compressed_len);
             decoder
                 .read_to_end(&mut buf)
-                .or(Err(RevlogError::Corrupted))?;
+                .map_err(|_| RevlogError::corrupted())?;
             Ok(buf)
         } else {
             let mut buf = vec![0; self.uncompressed_len];
             decoder
                 .read_exact(&mut buf)
-                .or(Err(RevlogError::Corrupted))?;
+                .map_err(|_| RevlogError::corrupted())?;
             Ok(buf)
         }
     }
@@ -326,14 +332,14 @@
         if self.is_delta() {
             let mut buf = Vec::with_capacity(self.compressed_len);
             zstd::stream::copy_decode(self.bytes, &mut buf)
-                .or(Err(RevlogError::Corrupted))?;
+                .map_err(|_| RevlogError::corrupted())?;
             Ok(buf)
         } else {
             let mut buf = vec![0; self.uncompressed_len];
             let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
-                .or(Err(RevlogError::Corrupted))?;
+                .map_err(|_| RevlogError::corrupted())?;
             if len != self.uncompressed_len {
-                Err(RevlogError::Corrupted)
+                Err(RevlogError::corrupted())
             } else {
                 Ok(buf)
             }
--- a/rust/rhg/src/error.rs	Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/rhg/src/error.rs	Wed Jan 27 14:45:25 2021 +0100
@@ -103,9 +103,6 @@
 impl From<(RevlogError, &str)> for CommandError {
     fn from((err, rev): (RevlogError, &str)) -> CommandError {
         match err {
-            RevlogError::IoError(err) => CommandError::Abort(Some(
-                utf8_to_local(&format!("abort: {}\n", err)).into(),
-            )),
             RevlogError::InvalidRevision => CommandError::Abort(Some(
                 utf8_to_local(&format!(
                     "abort: invalid revision identifier {}\n",
@@ -120,27 +117,7 @@
                 ))
                 .into(),
             )),
-            RevlogError::UnsuportedVersion(version) => {
-                CommandError::Abort(Some(
-                    utf8_to_local(&format!(
-                        "abort: unsupported revlog version {}\n",
-                        version
-                    ))
-                    .into(),
-                ))
-            }
-            RevlogError::Corrupted => {
-                CommandError::Abort(Some("abort: corrupted revlog\n".into()))
-            }
-            RevlogError::UnknowDataFormat(format) => {
-                CommandError::Abort(Some(
-                    utf8_to_local(&format!(
-                        "abort: unknow revlog dataformat {:?}\n",
-                        format
-                    ))
-                    .into(),
-                ))
-            }
+            RevlogError::Other(err) => CommandError::Other(err),
         }
     }
 }