rust: use HgError in RevlogError and Vfs
Differential Revision: https://phab.mercurial-scm.org/D9897
--- a/rust/hg-core/src/errors.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/errors.rs Wed Jan 27 14:45:25 2021 +0100
@@ -35,6 +35,9 @@
impl HgError {
pub fn corrupted(explanation: impl Into<String>) -> Self {
+ // TODO: capture a backtrace here and keep it in the error value
+ // to aid debugging?
+ // https://doc.rust-lang.org/std/backtrace/struct.Backtrace.html
HgError::CorruptedRepository(explanation.into())
}
}
--- a/rust/hg-core/src/lib.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/lib.rs Wed Jan 27 14:45:25 2021 +0100
@@ -3,6 +3,7 @@
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
+
mod ancestors;
pub mod dagops;
pub mod errors;
--- a/rust/hg-core/src/operations/cat.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/operations/cat.rs Wed Jan 27 14:45:25 2021 +0100
@@ -33,8 +33,8 @@
let changelog = Changelog::open(repo)?;
let manifest = Manifest::open(repo)?;
let changelog_entry = changelog.get_rev(rev)?;
- let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
- .map_err(|_| RevlogError::Corrupted)?;
+ let manifest_node =
+ Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
let manifest_entry = manifest.get_node(manifest_node.into())?;
let mut bytes = vec![];
@@ -46,8 +46,7 @@
let file_log =
Revlog::open(repo, &index_path, Some(&data_path))?;
- let file_node = Node::from_hex(node_bytes)
- .map_err(|_| RevlogError::Corrupted)?;
+ let file_node = Node::from_hex_for_repo(node_bytes)?;
let file_rev = file_log.get_node_rev(file_node.into())?;
let data = file_log.get_rev_data(file_rev)?;
if data.starts_with(&METADATA_DELIMITER) {
--- a/rust/hg-core/src/operations/list_tracked_files.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/operations/list_tracked_files.rs Wed Jan 27 14:45:25 2021 +0100
@@ -6,7 +6,7 @@
// GNU General Public License version 2 or any later version.
use crate::dirstate::parsers::parse_dirstate;
-use crate::errors::{HgError, IoResultExt};
+use crate::errors::HgError;
use crate::repo::Repo;
use crate::revlog::changelog::Changelog;
use crate::revlog::manifest::{Manifest, ManifestEntry};
@@ -25,12 +25,7 @@
impl Dirstate {
pub fn new(repo: &Repo) -> Result<Self, HgError> {
- let content = repo
- .hg_vfs()
- .read("dirstate")
- // TODO: this will be more accurate when we use `HgError` in
- // `Vfs::read`.
- .for_file("dirstate".as_ref())?;
+ let content = repo.hg_vfs().read("dirstate")?;
Ok(Self { content })
}
@@ -57,8 +52,8 @@
let changelog = Changelog::open(repo)?;
let manifest = Manifest::open(repo)?;
let changelog_entry = changelog.get_rev(rev)?;
- let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
- .map_err(|_| RevlogError::Corrupted)?;
+ let manifest_node =
+ Node::from_hex_for_repo(&changelog_entry.manifest_node()?)?;
let manifest_entry = manifest.get_node(manifest_node.into())?;
Ok(FilesForRev(manifest_entry))
}
--- a/rust/hg-core/src/repo.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/repo.rs Wed Jan 27 14:45:25 2021 +0100
@@ -1,4 +1,4 @@
-use crate::errors::HgError;
+use crate::errors::{HgError, IoResultExt};
use crate::operations::{find_root, FindRootError};
use crate::requirements;
use memmap::{Mmap, MmapOptions};
@@ -68,24 +68,19 @@
pub(crate) fn read(
&self,
relative_path: impl AsRef<Path>,
- ) -> std::io::Result<Vec<u8>> {
- std::fs::read(self.base.join(relative_path))
- }
-
- pub(crate) fn open(
- &self,
- relative_path: impl AsRef<Path>,
- ) -> std::io::Result<std::fs::File> {
- std::fs::File::open(self.base.join(relative_path))
+ ) -> Result<Vec<u8>, HgError> {
+ let path = self.base.join(relative_path);
+ std::fs::read(&path).for_file(&path)
}
pub(crate) fn mmap_open(
&self,
relative_path: impl AsRef<Path>,
- ) -> std::io::Result<Mmap> {
- let file = self.open(relative_path)?;
+ ) -> Result<Mmap, HgError> {
+ let path = self.base.join(relative_path);
+ let file = std::fs::File::open(&path).for_file(&path)?;
// TODO: what are the safety requirements here?
- let mmap = unsafe { MmapOptions::new().map(&file) }?;
+ let mmap = unsafe { MmapOptions::new().map(&file) }.for_file(&path)?;
Ok(mmap)
}
}
--- a/rust/hg-core/src/requirements.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/requirements.rs Wed Jan 27 14:45:25 2021 +0100
@@ -1,4 +1,4 @@
-use crate::errors::{HgError, HgResultExt, IoResultExt};
+use crate::errors::{HgError, HgResultExt};
use crate::repo::Repo;
fn parse(bytes: &[u8]) -> Result<Vec<String>, HgError> {
@@ -22,11 +22,8 @@
}
pub fn load(repo: &Repo) -> Result<Vec<String>, HgError> {
- if let Some(bytes) = repo
- .hg_vfs()
- .read("requires")
- .for_file("requires".as_ref())
- .io_not_found_as_none()?
+ if let Some(bytes) =
+ repo.hg_vfs().read("requires").io_not_found_as_none()?
{
parse(&bytes)
} else {
--- a/rust/hg-core/src/revlog/changelog.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/changelog.rs Wed Jan 27 14:45:25 2021 +0100
@@ -1,3 +1,4 @@
+use crate::errors::HgError;
use crate::repo::Repo;
use crate::revlog::revlog::{Revlog, RevlogError};
use crate::revlog::NodePrefix;
@@ -53,6 +54,8 @@
/// Return the node id of the `manifest` referenced by this `changelog`
/// entry.
pub fn manifest_node(&self) -> Result<&[u8], RevlogError> {
- self.lines().next().ok_or(RevlogError::Corrupted)
+ self.lines()
+ .next()
+ .ok_or_else(|| HgError::corrupted("empty changelog entry").into())
}
}
--- a/rust/hg-core/src/revlog/index.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/index.rs Wed Jan 27 14:45:25 2021 +0100
@@ -3,6 +3,7 @@
use byteorder::{BigEndian, ByteOrder};
+use crate::errors::HgError;
use crate::revlog::node::Node;
use crate::revlog::revlog::RevlogError;
use crate::revlog::{Revision, NULL_REVISION};
@@ -44,7 +45,8 @@
offsets: Some(offsets),
})
} else {
- Err(RevlogError::Corrupted)
+ Err(HgError::corrupted("unexpected inline revlog length")
+ .into())
}
} else {
Ok(Self {
--- a/rust/hg-core/src/revlog/node.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/node.rs Wed Jan 27 14:45:25 2021 +0100
@@ -8,6 +8,7 @@
//! In Mercurial code base, it is customary to call "a node" the binary SHA
//! of a revision.
+use crate::errors::HgError;
use bytes_cast::BytesCast;
use std::convert::{TryFrom, TryInto};
use std::fmt;
@@ -136,6 +137,19 @@
}
}
+ /// `from_hex`, but for input from an internal file of the repository such
+ /// as a changelog or manifest entry.
+ ///
+ /// An error is treated as repository corruption.
+ pub fn from_hex_for_repo(hex: impl AsRef<[u8]>) -> Result<Node, HgError> {
+ Self::from_hex(hex.as_ref()).map_err(|FromHexError| {
+ HgError::CorruptedRepository(format!(
+ "Expected a full hexadecimal node ID, found {}",
+ String::from_utf8_lossy(hex.as_ref())
+ ))
+ })
+ }
+
/// Provide access to binary data
///
/// This is needed by FFI layers, for instance to return expected
--- a/rust/hg-core/src/revlog/nodemap_docket.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/nodemap_docket.rs Wed Jan 27 14:45:25 2021 +0100
@@ -1,3 +1,4 @@
+use crate::errors::{HgError, HgResultExt};
use bytes_cast::{unaligned, BytesCast};
use memmap::Mmap;
use std::path::{Path, PathBuf};
@@ -38,12 +39,12 @@
index_path: &Path,
) -> Result<Option<(Self, Mmap)>, RevlogError> {
let docket_path = index_path.with_extension("n");
- let docket_bytes = match repo.store_vfs().read(&docket_path) {
- Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
- return Ok(None)
- }
- Err(e) => return Err(RevlogError::IoError(e)),
- Ok(bytes) => bytes,
+ let docket_bytes = if let Some(bytes) =
+ repo.store_vfs().read(&docket_path).io_not_found_as_none()?
+ {
+ bytes
+ } else {
+ return Ok(None);
};
let input = if let Some((&ONDISK_VERSION, rest)) =
@@ -54,36 +55,40 @@
return Ok(None);
};
- let (header, rest) = DocketHeader::from_bytes(input)?;
+ /// Treat any error as a parse error
+ fn parse<T, E>(result: Result<T, E>) -> Result<T, RevlogError> {
+ result.map_err(|_| {
+ HgError::corrupted("nodemap docket parse error").into()
+ })
+ }
+
+ let (header, rest) = parse(DocketHeader::from_bytes(input))?;
let uid_size = header.uid_size as usize;
// TODO: do we care about overflow for 4 GB+ nodemap files on 32-bit
// systems?
let tip_node_size = header.tip_node_size.get() as usize;
let data_length = header.data_length.get() as usize;
- let (uid, rest) = u8::slice_from_bytes(rest, uid_size)?;
- let (_tip_node, _rest) = u8::slice_from_bytes(rest, tip_node_size)?;
- let uid =
- std::str::from_utf8(uid).map_err(|_| RevlogError::Corrupted)?;
+ let (uid, rest) = parse(u8::slice_from_bytes(rest, uid_size))?;
+ let (_tip_node, _rest) =
+ parse(u8::slice_from_bytes(rest, tip_node_size))?;
+ let uid = parse(std::str::from_utf8(uid))?;
let docket = NodeMapDocket { data_length };
let data_path = rawdata_path(&docket_path, uid);
- // TODO: use `std::fs::read` here when the `persistent-nodemap.mmap`
+ // TODO: use `vfs.read()` here when the `persistent-nodemap.mmap`
// config is false?
- match repo.store_vfs().mmap_open(&data_path) {
- Ok(mmap) => {
- if mmap.len() >= data_length {
- Ok(Some((docket, mmap)))
- } else {
- Err(RevlogError::Corrupted)
- }
+ if let Some(mmap) = repo
+ .store_vfs()
+ .mmap_open(&data_path)
+ .io_not_found_as_none()?
+ {
+ if mmap.len() >= data_length {
+ Ok(Some((docket, mmap)))
+ } else {
+ Err(HgError::corrupted("persistent nodemap too short").into())
}
- Err(error) => {
- if error.kind() == std::io::ErrorKind::NotFound {
- Ok(None)
- } else {
- Err(RevlogError::IoError(error))
- }
- }
+ } else {
+ Ok(None)
}
}
}
--- a/rust/hg-core/src/revlog/revlog.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/hg-core/src/revlog/revlog.rs Wed Jan 27 14:45:25 2021 +0100
@@ -13,25 +13,34 @@
use super::index::Index;
use super::node::{NodePrefix, NODE_BYTES_LENGTH, NULL_NODE};
use super::nodemap;
-use super::nodemap::NodeMap;
+use super::nodemap::{NodeMap, NodeMapError};
use super::nodemap_docket::NodeMapDocket;
use super::patch;
+use crate::errors::HgError;
use crate::repo::Repo;
use crate::revlog::Revision;
+#[derive(derive_more::From)]
pub enum RevlogError {
- IoError(std::io::Error),
- UnsuportedVersion(u16),
InvalidRevision,
/// Found more than one entry whose ID match the requested prefix
AmbiguousPrefix,
- Corrupted,
- UnknowDataFormat(u8),
+ #[from]
+ Other(HgError),
}
-impl From<bytes_cast::FromBytesError> for RevlogError {
- fn from(_: bytes_cast::FromBytesError) -> Self {
- RevlogError::Corrupted
+impl From<NodeMapError> for RevlogError {
+ fn from(error: NodeMapError) -> Self {
+ match error {
+ NodeMapError::MultipleResults => RevlogError::AmbiguousPrefix,
+ NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
+ }
+ }
+}
+
+impl RevlogError {
+ fn corrupted() -> Self {
+ RevlogError::Other(HgError::corrupted("corrupted revlog"))
}
}
@@ -59,14 +68,12 @@
data_path: Option<&Path>,
) -> Result<Self, RevlogError> {
let index_path = index_path.as_ref();
- let index_mmap = repo
- .store_vfs()
- .mmap_open(&index_path)
- .map_err(RevlogError::IoError)?;
+ let index_mmap = repo.store_vfs().mmap_open(&index_path)?;
let version = get_version(&index_mmap);
if version != 1 {
- return Err(RevlogError::UnsuportedVersion(version));
+ // A proper new version should have had a repo/store requirement.
+ return Err(RevlogError::corrupted());
}
let index = Index::new(Box::new(index_mmap))?;
@@ -80,10 +87,7 @@
None
} else {
let data_path = data_path.unwrap_or(&default_data_path);
- let data_mmap = repo
- .store_vfs()
- .mmap_open(data_path)
- .map_err(RevlogError::IoError)?;
+ let data_mmap = repo.store_vfs().mmap_open(data_path)?;
Some(Box::new(data_mmap))
};
@@ -121,9 +125,7 @@
) -> Result<Revision, RevlogError> {
if let Some(nodemap) = &self.nodemap {
return nodemap
- .find_bin(&self.index, node)
- // TODO: propagate details of this error:
- .map_err(|_| RevlogError::Corrupted)?
+ .find_bin(&self.index, node)?
.ok_or(RevlogError::InvalidRevision);
}
@@ -136,7 +138,9 @@
let mut found_by_prefix = None;
for rev in (0..self.len() as Revision).rev() {
let index_entry =
- self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
+ self.index.get_entry(rev).ok_or(HgError::corrupted(
+ "revlog references a revision not in the index",
+ ))?;
if node == *index_entry.hash() {
return Ok(rev);
}
@@ -167,8 +171,9 @@
let mut delta_chain = vec![];
while let Some(base_rev) = entry.base_rev {
delta_chain.push(entry);
- entry =
- self.get_entry(base_rev).or(Err(RevlogError::Corrupted))?;
+ entry = self
+ .get_entry(base_rev)
+ .map_err(|_| RevlogError::corrupted())?;
}
// TODO do not look twice in the index
@@ -191,7 +196,7 @@
) {
Ok(data)
} else {
- Err(RevlogError::Corrupted)
+ Err(RevlogError::corrupted())
}
}
@@ -301,7 +306,8 @@
b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
// zstd data.
b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
- format_type => Err(RevlogError::UnknowDataFormat(format_type)),
+ // A proper new format should have had a repo/store requirement.
+ _format_type => Err(RevlogError::corrupted()),
}
}
@@ -311,13 +317,13 @@
let mut buf = Vec::with_capacity(self.compressed_len);
decoder
.read_to_end(&mut buf)
- .or(Err(RevlogError::Corrupted))?;
+ .map_err(|_| RevlogError::corrupted())?;
Ok(buf)
} else {
let mut buf = vec![0; self.uncompressed_len];
decoder
.read_exact(&mut buf)
- .or(Err(RevlogError::Corrupted))?;
+ .map_err(|_| RevlogError::corrupted())?;
Ok(buf)
}
}
@@ -326,14 +332,14 @@
if self.is_delta() {
let mut buf = Vec::with_capacity(self.compressed_len);
zstd::stream::copy_decode(self.bytes, &mut buf)
- .or(Err(RevlogError::Corrupted))?;
+ .map_err(|_| RevlogError::corrupted())?;
Ok(buf)
} else {
let mut buf = vec![0; self.uncompressed_len];
let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
- .or(Err(RevlogError::Corrupted))?;
+ .map_err(|_| RevlogError::corrupted())?;
if len != self.uncompressed_len {
- Err(RevlogError::Corrupted)
+ Err(RevlogError::corrupted())
} else {
Ok(buf)
}
--- a/rust/rhg/src/error.rs Wed Jan 27 14:59:09 2021 +0100
+++ b/rust/rhg/src/error.rs Wed Jan 27 14:45:25 2021 +0100
@@ -103,9 +103,6 @@
impl From<(RevlogError, &str)> for CommandError {
fn from((err, rev): (RevlogError, &str)) -> CommandError {
match err {
- RevlogError::IoError(err) => CommandError::Abort(Some(
- utf8_to_local(&format!("abort: {}\n", err)).into(),
- )),
RevlogError::InvalidRevision => CommandError::Abort(Some(
utf8_to_local(&format!(
"abort: invalid revision identifier {}\n",
@@ -120,27 +117,7 @@
))
.into(),
)),
- RevlogError::UnsuportedVersion(version) => {
- CommandError::Abort(Some(
- utf8_to_local(&format!(
- "abort: unsupported revlog version {}\n",
- version
- ))
- .into(),
- ))
- }
- RevlogError::Corrupted => {
- CommandError::Abort(Some("abort: corrupted revlog\n".into()))
- }
- RevlogError::UnknowDataFormat(format) => {
- CommandError::Abort(Some(
- utf8_to_local(&format!(
- "abort: unknow revlog dataformat {:?}\n",
- format
- ))
- .into(),
- ))
- }
+ RevlogError::Other(err) => CommandError::Other(err),
}
}
}