Mercurial > hg
changeset 50977:1928b770e3e7
rust: use the new `UncheckedRevision` everywhere applicable
This step converts all revisions that shouldn't be considered "valid" in any
context to `UncheckedRevison`, allowing `Revision` to be changed for a
stronger type in a later changeset.
Note that the conversion from unchecked to checked is manual and requires
at least some thought from the programmer, although directly using `Revision`
is still possible. A later changeset will make this mistake harder to make.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Thu, 10 Aug 2023 11:00:34 +0200 |
parents | 9929c8a73488 |
children | 27e773aa607d |
files | rust/hg-core/src/operations/cat.rs rust/hg-core/src/operations/debugdata.rs rust/hg-core/src/operations/list_tracked_files.rs rust/hg-core/src/repo.rs rust/hg-core/src/revlog/changelog.rs rust/hg-core/src/revlog/filelog.rs rust/hg-core/src/revlog/index.rs rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/revlog/mod.rs rust/hg-core/src/revlog/nodemap.rs rust/hg-core/src/revset.rs rust/hg-cpython/src/revlog.rs |
diffstat | 12 files changed, 252 insertions(+), 130 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/operations/cat.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/operations/cat.rs Thu Aug 10 11:00:34 2023 +0200 @@ -84,10 +84,10 @@ mut files: Vec<&'a HgPath>, ) -> Result<CatOutput<'a>, RevlogError> { let rev = crate::revset::resolve_single(revset, repo)?; - let manifest = repo.manifest_for_rev(rev)?; + let manifest = repo.manifest_for_rev(rev.into())?; let node = *repo .changelog()? - .node_from_rev(rev) + .node_from_rev(rev.into()) .expect("should succeed when repo.manifest did"); let mut results: Vec<(&'a HgPath, Vec<u8>)> = vec![]; let mut found_any = false;
--- a/rust/hg-core/src/operations/debugdata.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/operations/debugdata.rs Thu Aug 10 11:00:34 2023 +0200 @@ -33,6 +33,6 @@ Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?; let rev = crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?; - let data = revlog.get_rev_data(rev)?; + let data = revlog.get_rev_data_for_checked_rev(rev)?; Ok(data.into_owned()) }
--- a/rust/hg-core/src/operations/list_tracked_files.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/operations/list_tracked_files.rs Thu Aug 10 11:00:34 2023 +0200 @@ -21,7 +21,7 @@ ) -> Result<FilesForRev, RevlogError> { let rev = crate::revset::resolve_single(revset, repo)?; Ok(FilesForRev { - manifest: repo.manifest_for_rev(rev)?, + manifest: repo.manifest_for_rev(rev.into())?, narrow_matcher, }) }
--- a/rust/hg-core/src/repo.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/repo.rs Thu Aug 10 11:00:34 2023 +0200 @@ -15,8 +15,8 @@ use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::{is_dir, is_file, Vfs}; -use crate::{requirements, NodePrefix}; -use crate::{DirstateError, Revision}; +use crate::DirstateError; +use crate::{requirements, NodePrefix, UncheckedRevision}; use std::cell::{Ref, RefCell, RefMut}; use std::collections::HashSet; use std::io::Seek; @@ -562,7 +562,7 @@ /// Returns the manifest of the *changeset* with the given revision number pub fn manifest_for_rev( &self, - revision: Revision, + revision: UncheckedRevision, ) -> Result<Manifest, RevlogError> { self.manifestlog()?.data_for_node( self.changelog()?
--- a/rust/hg-core/src/revlog/changelog.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revlog/changelog.rs Thu Aug 10 11:00:34 2023 +0200 @@ -4,6 +4,7 @@ use crate::revlog::{Revlog, RevlogEntry, RevlogError}; use crate::utils::hg_path::HgPath; use crate::vfs::Vfs; +use crate::UncheckedRevision; use itertools::Itertools; use std::ascii::escape_default; use std::borrow::Cow; @@ -29,15 +30,24 @@ node: NodePrefix, ) -> Result<ChangelogRevisionData, RevlogError> { let rev = self.revlog.rev_from_node(node)?; - self.data_for_rev(rev) + self.entry_for_checked_rev(rev)?.data() } /// Return the [`ChangelogEntry`] for the given revision number. pub fn entry_for_rev( &self, + rev: UncheckedRevision, + ) -> Result<ChangelogEntry, RevlogError> { + let revlog_entry = self.revlog.get_entry(rev)?; + Ok(ChangelogEntry { revlog_entry }) + } + + /// Same as [`Self::entry_for_rev`] for checked revisions. + fn entry_for_checked_rev( + &self, rev: Revision, ) -> Result<ChangelogEntry, RevlogError> { - let revlog_entry = self.revlog.get_entry(rev)?; + let revlog_entry = self.revlog.get_entry_for_checked_rev(rev)?; Ok(ChangelogEntry { revlog_entry }) } @@ -49,12 +59,12 @@ /// [entry_for_rev](`Self::entry_for_rev`) and doing everything from there. pub fn data_for_rev( &self, - rev: Revision, + rev: UncheckedRevision, ) -> Result<ChangelogRevisionData, RevlogError> { self.entry_for_rev(rev)?.data() } - pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> { + pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { self.revlog.node_from_rev(rev) } @@ -330,12 +340,12 @@ let changelog = Changelog { revlog }; assert_eq!( - changelog.data_for_rev(NULL_REVISION)?, + changelog.data_for_rev(NULL_REVISION.into())?, ChangelogRevisionData::null() ); // same with the intermediate entry object assert_eq!( - changelog.entry_for_rev(NULL_REVISION)?.data()?, + changelog.entry_for_rev(NULL_REVISION.into())?.data()?, ChangelogRevisionData::null() ); Ok(())
--- a/rust/hg-core/src/revlog/filelog.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revlog/filelog.rs Thu Aug 10 11:00:34 2023 +0200 @@ -1,4 +1,5 @@ use crate::errors::HgError; +use crate::exit_codes; use crate::repo::Repo; use crate::revlog::path_encode::path_encode; use crate::revlog::NodePrefix; @@ -8,6 +9,7 @@ use crate::utils::files::get_path_from_bytes; use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; +use crate::UncheckedRevision; use std::path::PathBuf; /// A specialized `Revlog` to work with file data logs. @@ -39,14 +41,14 @@ file_node: impl Into<NodePrefix>, ) -> Result<FilelogRevisionData, RevlogError> { let file_rev = self.revlog.rev_from_node(file_node.into())?; - self.data_for_rev(file_rev) + self.data_for_rev(file_rev.into()) } /// The given revision is that of the file as found in a filelog, not of a /// changeset. pub fn data_for_rev( &self, - file_rev: Revision, + file_rev: UncheckedRevision, ) -> Result<FilelogRevisionData, RevlogError> { let data: Vec<u8> = self.revlog.get_rev_data(file_rev)?.into_owned(); Ok(FilelogRevisionData(data)) @@ -59,16 +61,25 @@ file_node: impl Into<NodePrefix>, ) -> Result<FilelogEntry, RevlogError> { let file_rev = self.revlog.rev_from_node(file_node.into())?; - self.entry_for_rev(file_rev) + self.entry_for_checked_rev(file_rev) } /// The given revision is that of the file as found in a filelog, not of a /// changeset. pub fn entry_for_rev( &self, + file_rev: UncheckedRevision, + ) -> Result<FilelogEntry, RevlogError> { + Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) + } + + fn entry_for_checked_rev( + &self, file_rev: Revision, ) -> Result<FilelogEntry, RevlogError> { - Ok(FilelogEntry(self.revlog.get_entry(file_rev)?)) + Ok(FilelogEntry( + self.revlog.get_entry_for_checked_rev(file_rev)?, + )) } } @@ -165,7 +176,19 @@ } pub fn data(&self) -> Result<FilelogRevisionData, HgError> { - Ok(FilelogRevisionData(self.0.data()?.into_owned())) + let data = self.0.data(); + match data { + Ok(data) => Ok(FilelogRevisionData(data.into_owned())), + // Errors other than `HgError` should not happen at this point + Err(e) => match e { + RevlogError::Other(hg_error) => Err(hg_error), + revlog_error => Err(HgError::abort( + revlog_error.to_string(), + exit_codes::ABORT, + None, + )), + }, + } } }
--- a/rust/hg-core/src/revlog/index.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revlog/index.rs Thu Aug 10 11:00:34 2023 +0200 @@ -1,3 +1,4 @@ +use std::fmt::Debug; use std::ops::Deref; use byteorder::{BigEndian, ByteOrder}; @@ -5,6 +6,7 @@ use crate::errors::HgError; use crate::revlog::node::Node; use crate::revlog::{Revision, NULL_REVISION}; +use crate::UncheckedRevision; pub const INDEX_ENTRY_SIZE: usize = 64; @@ -86,6 +88,15 @@ uses_generaldelta: bool, } +impl Debug for Index { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Index") + .field("offsets", &self.offsets) + .field("uses_generaldelta", &self.uses_generaldelta) + .finish() + } +} + impl Index { /// Create an index from bytes. /// Calculate the start of each entry when is_inline is true. @@ -175,36 +186,32 @@ if rev == NULL_REVISION { return None; } - if let Some(offsets) = &self.offsets { + Some(if let Some(offsets) = &self.offsets { self.get_entry_inline(rev, offsets) } else { self.get_entry_separated(rev) - } + }) } fn get_entry_inline( &self, rev: Revision, offsets: &[usize], - ) -> Option<IndexEntry> { - let start = *offsets.get(rev as usize)?; - let end = start.checked_add(INDEX_ENTRY_SIZE)?; + ) -> IndexEntry { + let start = offsets[rev as usize]; + let end = start + INDEX_ENTRY_SIZE; let bytes = &self.bytes[start..end]; // See IndexEntry for an explanation of this override. let offset_override = Some(end); - Some(IndexEntry { + IndexEntry { bytes, offset_override, - }) + } } - fn get_entry_separated(&self, rev: Revision) -> Option<IndexEntry> { - let max_rev = self.bytes.len() / INDEX_ENTRY_SIZE; - if rev as usize >= max_rev { - return None; - } + fn get_entry_separated(&self, rev: Revision) -> IndexEntry { let start = rev as usize * INDEX_ENTRY_SIZE; let end = start + INDEX_ENTRY_SIZE; let bytes = &self.bytes[start..end]; @@ -213,10 +220,10 @@ // for the index's metadata (saving space because it is always 0) let offset_override = if rev == 0 { Some(0) } else { None }; - Some(IndexEntry { + IndexEntry { bytes, offset_override, - }) + } } } @@ -273,23 +280,23 @@ } /// Return the revision upon which the data has been derived. - pub fn base_revision_or_base_of_delta_chain(&self) -> Revision { + pub fn base_revision_or_base_of_delta_chain(&self) -> UncheckedRevision { // TODO Maybe return an Option when base_revision == rev? // Requires to add rev to IndexEntry - BigEndian::read_i32(&self.bytes[16..]) + BigEndian::read_i32(&self.bytes[16..]).into() } - pub fn link_revision(&self) -> Revision { - BigEndian::read_i32(&self.bytes[20..]) + pub fn link_revision(&self) -> UncheckedRevision { + BigEndian::read_i32(&self.bytes[20..]).into() } - pub fn p1(&self) -> Revision { - BigEndian::read_i32(&self.bytes[24..]) + pub fn p1(&self) -> UncheckedRevision { + BigEndian::read_i32(&self.bytes[24..]).into() } - pub fn p2(&self) -> Revision { - BigEndian::read_i32(&self.bytes[28..]) + pub fn p2(&self) -> UncheckedRevision { + BigEndian::read_i32(&self.bytes[28..]).into() } /// Return the hash of revision's full text. @@ -547,7 +554,7 @@ offset_override: None, }; - assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1) + assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1.into()) } #[test] @@ -559,7 +566,7 @@ offset_override: None, }; - assert_eq!(entry.link_revision(), 123); + assert_eq!(entry.link_revision(), 123.into()); } #[test] @@ -571,7 +578,7 @@ offset_override: None, }; - assert_eq!(entry.p1(), 123); + assert_eq!(entry.p1(), 123.into()); } #[test] @@ -583,7 +590,7 @@ offset_override: None, }; - assert_eq!(entry.p2(), 123); + assert_eq!(entry.p2(), 123.into()); } #[test]
--- a/rust/hg-core/src/revlog/manifest.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revlog/manifest.rs Thu Aug 10 11:00:34 2023 +0200 @@ -1,10 +1,10 @@ use crate::errors::HgError; -use crate::revlog::Revision; use crate::revlog::{Node, NodePrefix}; use crate::revlog::{Revlog, RevlogError}; use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::Vfs; +use crate::{Revision, UncheckedRevision}; /// A specialized `Revlog` to work with `manifest` data format. pub struct Manifestlog { @@ -32,7 +32,7 @@ node: NodePrefix, ) -> Result<Manifest, RevlogError> { let rev = self.revlog.rev_from_node(node)?; - self.data_for_rev(rev) + self.data_for_checked_rev(rev) } /// Return the `Manifest` of a given revision number. @@ -43,9 +43,18 @@ /// See also `Repo::manifest_for_rev` pub fn data_for_rev( &self, + rev: UncheckedRevision, + ) -> Result<Manifest, RevlogError> { + let bytes = self.revlog.get_rev_data(rev)?.into_owned(); + Ok(Manifest { bytes }) + } + + pub fn data_for_checked_rev( + &self, rev: Revision, ) -> Result<Manifest, RevlogError> { - let bytes = self.revlog.get_rev_data(rev)?.into_owned(); + let bytes = + self.revlog.get_rev_data_for_checked_rev(rev)?.into_owned(); Ok(Manifest { bytes }) } }
--- a/rust/hg-core/src/revlog/mod.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revlog/mod.rs Thu Aug 10 11:00:34 2023 +0200 @@ -47,7 +47,24 @@ /// /// As noted in revlog.c, revision numbers are actually encoded in /// 4 bytes, and are liberally converted to ints, whence the i32 -pub type UncheckedRevision = i32; +#[derive( + Debug, + derive_more::Display, + Clone, + Copy, + Hash, + PartialEq, + Eq, + PartialOrd, + Ord, +)] +pub struct UncheckedRevision(i32); + +impl From<Revision> for UncheckedRevision { + fn from(value: Revision) -> Self { + Self(value) + } +} /// Marker expressing the absence of a parent /// @@ -60,7 +77,8 @@ /// This is also equal to `i32::max_value()`, but it's better to spell /// it out explicitely, same as in `mercurial.node` #[allow(clippy::unreadable_literal)] -pub const WORKING_DIRECTORY_REVISION: Revision = 0x7fffffff; +pub const WORKING_DIRECTORY_REVISION: UncheckedRevision = + UncheckedRevision(0x7fffffff); pub const WORKING_DIRECTORY_HEX: &str = "ffffffffffffffffffffffffffffffffffffffff"; @@ -90,14 +108,14 @@ self.len() == 0 } - /// Return a reference to the Node or `None` if rev is out of bounds - /// - /// `NULL_REVISION` is not considered to be out of bounds. + /// Return a reference to the Node or `None` for `NULL_REVISION` fn node(&self, rev: Revision) -> Option<&Node>; /// Return a [`Revision`] if `rev` is a valid revision number for this /// index fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { + let rev = rev.0; + if rev == NULL_REVISION || (rev >= 0 && (rev as usize) < self.len()) { Some(rev) } else { @@ -120,7 +138,7 @@ const NULL_REVLOG_ENTRY_FLAGS: u16 = 0; -#[derive(Debug, derive_more::From)] +#[derive(Debug, derive_more::From, derive_more::Display)] pub enum RevlogError { InvalidRevision, /// Working directory is not supported @@ -231,10 +249,11 @@ /// Returns the node ID for the given revision number, if it exists in this /// revlog - pub fn node_from_rev(&self, rev: Revision) -> Option<&Node> { - if rev == NULL_REVISION { + pub fn node_from_rev(&self, rev: UncheckedRevision) -> Option<&Node> { + if rev == NULL_REVISION.into() { return Some(&NULL_NODE); } + let rev = self.index.check_revision(rev)?; Some(self.index.get_entry(rev)?.hash()) } @@ -296,8 +315,8 @@ } /// Returns whether the given revision exists in this revlog. - pub fn has_rev(&self, rev: Revision) -> bool { - self.index.get_entry(rev).is_some() + pub fn has_rev(&self, rev: UncheckedRevision) -> bool { + self.index.check_revision(rev).is_some() } /// Return the full data associated to a revision. @@ -307,12 +326,23 @@ /// snapshot to rebuild the final data. pub fn get_rev_data( &self, + rev: UncheckedRevision, + ) -> Result<Cow<[u8]>, RevlogError> { + if rev == NULL_REVISION.into() { + return Ok(Cow::Borrowed(&[])); + }; + self.get_entry(rev)?.data() + } + + /// [`Self::get_rev_data`] for checked revisions. + pub fn get_rev_data_for_checked_rev( + &self, rev: Revision, ) -> Result<Cow<[u8]>, RevlogError> { if rev == NULL_REVISION { return Ok(Cow::Borrowed(&[])); }; - Ok(self.get_entry(rev)?.data()?) + self.get_entry_for_checked_rev(rev)?.data() } /// Check the hash of some given data against the recorded hash. @@ -380,8 +410,7 @@ } } - /// Get an entry of the revlog. - pub fn get_entry( + fn get_entry_for_checked_rev( &self, rev: Revision, ) -> Result<RevlogEntry, RevlogError> { @@ -399,36 +428,60 @@ } else { &self.data()[start..end] }; + let base_rev = self + .index + .check_revision(index_entry.base_revision_or_base_of_delta_chain()) + .ok_or_else(|| { + RevlogError::corrupted(format!( + "base revision for rev {} is invalid", + rev + )) + })?; + let p1 = + self.index.check_revision(index_entry.p1()).ok_or_else(|| { + RevlogError::corrupted(format!( + "p1 for rev {} is invalid", + rev + )) + })?; + let p2 = + self.index.check_revision(index_entry.p2()).ok_or_else(|| { + RevlogError::corrupted(format!( + "p2 for rev {} is invalid", + rev + )) + })?; let entry = RevlogEntry { revlog: self, rev, bytes: data, compressed_len: index_entry.compressed_len(), uncompressed_len: index_entry.uncompressed_len(), - base_rev_or_base_of_delta_chain: if index_entry - .base_revision_or_base_of_delta_chain() - == rev - { + base_rev_or_base_of_delta_chain: if base_rev == rev { None } else { - Some(index_entry.base_revision_or_base_of_delta_chain()) + Some(base_rev) }, - p1: index_entry.p1(), - p2: index_entry.p2(), + p1, + p2, flags: index_entry.flags(), hash: *index_entry.hash(), }; Ok(entry) } - /// when resolving internal references within revlog, any errors - /// should be reported as corruption, instead of e.g. "invalid revision" - fn get_entry_internal( + /// Get an entry of the revlog. + pub fn get_entry( &self, - rev: Revision, - ) -> Result<RevlogEntry, HgError> { - self.get_entry(rev) - .map_err(|_| corrupted(format!("revision {} out of range", rev))) + rev: UncheckedRevision, + ) -> Result<RevlogEntry, RevlogError> { + if rev == NULL_REVISION.into() { + return Ok(self.make_null_entry()); + } + let rev = self.index.check_revision(rev).ok_or_else(|| { + RevlogError::corrupted(format!("rev {} is invalid", rev)) + })?; + self.get_entry_for_checked_rev(rev) } } @@ -486,7 +539,7 @@ if self.p1 == NULL_REVISION { Ok(None) } else { - Ok(Some(self.revlog.get_entry(self.p1)?)) + Ok(Some(self.revlog.get_entry_for_checked_rev(self.p1)?)) } } @@ -496,7 +549,7 @@ if self.p2 == NULL_REVISION { Ok(None) } else { - Ok(Some(self.revlog.get_entry(self.p2)?)) + Ok(Some(self.revlog.get_entry_for_checked_rev(self.p2)?)) } } @@ -527,7 +580,7 @@ } /// The data for this entry, after resolving deltas if any. - pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, HgError> { + pub fn rawdata(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { let mut entry = self.clone(); let mut delta_chain = vec![]; @@ -539,11 +592,11 @@ while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { entry = if uses_generaldelta { delta_chain.push(entry); - self.revlog.get_entry_internal(base_rev)? + self.revlog.get_entry_for_checked_rev(base_rev)? } else { - let base_rev = entry.rev - 1; + let base_rev = UncheckedRevision(entry.rev - 1); delta_chain.push(entry); - self.revlog.get_entry_internal(base_rev)? + self.revlog.get_entry(base_rev)? }; } @@ -559,7 +612,7 @@ fn check_data( &self, data: Cow<'revlog, [u8]>, - ) -> Result<Cow<'revlog, [u8]>, HgError> { + ) -> Result<Cow<'revlog, [u8]>, RevlogError> { if self.revlog.check_hash( self.p1, self.p2, @@ -571,22 +624,24 @@ if (self.flags & REVISION_FLAG_ELLIPSIS) != 0 { return Err(HgError::unsupported( "ellipsis revisions are not supported by rhg", - )); + ) + .into()); } Err(corrupted(format!( "hash check failed for revision {}", self.rev - ))) + )) + .into()) } } - pub fn data(&self) -> Result<Cow<'revlog, [u8]>, HgError> { + pub fn data(&self) -> Result<Cow<'revlog, [u8]>, RevlogError> { let data = self.rawdata()?; if self.rev == NULL_REVISION { return Ok(data); } if self.is_censored() { - return Err(HgError::CensoredNodeError); + return Err(HgError::CensoredNodeError.into()); } self.check_data(data) } @@ -705,13 +760,13 @@ let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); assert!(revlog.is_empty()); assert_eq!(revlog.len(), 0); - assert!(revlog.get_entry(0).is_err()); - assert!(!revlog.has_rev(0)); + assert!(revlog.get_entry(0.into()).is_err()); + assert!(!revlog.has_rev(0.into())); assert_eq!( revlog.rev_from_node(NULL_NODE.into()).unwrap(), NULL_REVISION ); - let null_entry = revlog.get_entry(NULL_REVISION).ok().unwrap(); + let null_entry = revlog.get_entry(NULL_REVISION.into()).ok().unwrap(); assert_eq!(null_entry.revision(), NULL_REVISION); assert!(null_entry.data().unwrap().is_empty()); } @@ -750,7 +805,7 @@ std::fs::write(temp.path().join("foo.i"), contents).unwrap(); let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); - let entry0 = revlog.get_entry(0).ok().unwrap(); + let entry0 = revlog.get_entry(0.into()).ok().unwrap(); assert_eq!(entry0.revision(), 0); assert_eq!(*entry0.node(), node0); assert!(!entry0.has_p1()); @@ -761,7 +816,7 @@ let p2_entry = entry0.p2_entry().unwrap(); assert!(p2_entry.is_none()); - let entry1 = revlog.get_entry(1).ok().unwrap(); + let entry1 = revlog.get_entry(1.into()).ok().unwrap(); assert_eq!(entry1.revision(), 1); assert_eq!(*entry1.node(), node1); assert!(!entry1.has_p1()); @@ -772,7 +827,7 @@ let p2_entry = entry1.p2_entry().unwrap(); assert!(p2_entry.is_none()); - let entry2 = revlog.get_entry(2).ok().unwrap(); + let entry2 = revlog.get_entry(2.into()).ok().unwrap(); assert_eq!(entry2.revision(), 2); assert_eq!(*entry2.node(), node2); assert!(entry2.has_p1()); @@ -817,7 +872,7 @@ let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); // accessing the data shows the corruption - revlog.get_entry(0).unwrap().data().unwrap_err(); + revlog.get_entry(0.into()).unwrap().data().unwrap_err(); assert_eq!(revlog.rev_from_node(NULL_NODE.into()).unwrap(), -1); assert_eq!(revlog.rev_from_node(node0.into()).unwrap(), 0);
--- a/rust/hg-core/src/revlog/nodemap.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revlog/nodemap.rs Thu Aug 10 11:00:34 2023 +0200 @@ -12,6 +12,8 @@ //! Following existing implicit conventions, the "nodemap" terminology //! is used in a more abstract context. +use crate::UncheckedRevision; + use super::{ node::NULL_NODE, Node, NodePrefix, Revision, RevlogIndex, NULL_REVISION, }; @@ -30,7 +32,7 @@ /// This can be returned by methods meant for (at most) one match. MultipleResults, /// A `Revision` stored in the nodemap could not be found in the index - RevisionNotInIndex(Revision), + RevisionNotInIndex(UncheckedRevision), } /// Mapping system from Mercurial nodes to revision numbers. @@ -125,7 +127,9 @@ /// use. #[derive(Clone, Debug, Eq, PartialEq)] enum Element { - Rev(Revision), + // This is not a Mercurial revision. It's a `i32` because this is the + // right type for this structure. + Rev(i32), Block(usize), None, } @@ -245,17 +249,21 @@ fn has_prefix_or_none( idx: &impl RevlogIndex, prefix: NodePrefix, - rev: Revision, + rev: UncheckedRevision, ) -> Result<Option<Revision>, NodeMapError> { - idx.node(rev) - .ok_or(NodeMapError::RevisionNotInIndex(rev)) - .map(|node| { - if prefix.is_prefix_of(node) { - Some(rev) - } else { - None - } - }) + match idx.check_revision(rev) { + Some(checked) => idx + .node(checked) + .ok_or(NodeMapError::RevisionNotInIndex(rev)) + .map(|node| { + if prefix.is_prefix_of(node) { + Some(checked) + } else { + None + } + }), + None => Err(NodeMapError::RevisionNotInIndex(rev)), + } } /// validate that the candidate's node starts indeed with given prefix, @@ -266,7 +274,7 @@ fn validate_candidate( idx: &impl RevlogIndex, prefix: NodePrefix, - candidate: (Option<Revision>, usize), + candidate: (Option<UncheckedRevision>, usize), ) -> Result<(Option<Revision>, usize), NodeMapError> { let (rev, steps) = candidate; if let Some(nz_nybble) = prefix.first_different_nybble(&NULL_NODE) { @@ -384,6 +392,8 @@ /// be inferred from /// the `NodeTree` data is that `rev` is the revision with the longest /// common node prefix with the given prefix. + /// We return an [`UncheckedRevision`] because we have no guarantee that + /// the revision we found is valid for the index. /// /// The second returned value is the size of the smallest subprefix /// of `prefix` that would give the same result, i.e. not the @@ -392,7 +402,7 @@ fn lookup( &self, prefix: NodePrefix, - ) -> Result<(Option<Revision>, usize), NodeMapError> { + ) -> Result<(Option<UncheckedRevision>, usize), NodeMapError> { for (i, visit_item) in self.visit(prefix).enumerate() { if let Some(opt) = visit_item.final_revision() { return Ok((opt, i + 1)); @@ -464,9 +474,9 @@ self.mutable_block(deepest.block_idx); if let Element::Rev(old_rev) = deepest.element { - let old_node = index - .node(old_rev) - .ok_or(NodeMapError::RevisionNotInIndex(old_rev))?; + let old_node = index.node(old_rev).ok_or_else(|| { + NodeMapError::RevisionNotInIndex(old_rev.into()) + })?; if old_node == node { return Ok(()); // avoid creating lots of useless blocks } @@ -623,13 +633,13 @@ impl NodeTreeVisitItem { // Return `Some(opt)` if this item is final, with `opt` being the - // `Revision` that it may represent. + // `UncheckedRevision` that it may represent. // // If the item is not terminal, return `None` - fn final_revision(&self) -> Option<Option<Revision>> { + fn final_revision(&self) -> Option<Option<UncheckedRevision>> { match self.element { Element::Block(_) => None, - Element::Rev(r) => Some(Some(r)), + Element::Rev(r) => Some(Some(r.into())), Element::None => Some(None), } } @@ -733,16 +743,20 @@ assert_eq!(block.get(4), Element::Rev(1)); } - type TestIndex = HashMap<Revision, Node>; + type TestIndex = HashMap<UncheckedRevision, Node>; impl RevlogIndex for TestIndex { fn node(&self, rev: Revision) -> Option<&Node> { - self.get(&rev) + self.get(&rev.into()) } fn len(&self) -> usize { self.len() } + + fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { + self.get(&rev).map(|_| rev.0) + } } /// Pad hexadecimal Node prefix with zeros on the right @@ -756,7 +770,7 @@ /// Pad hexadecimal Node prefix with zeros on the right, then insert fn pad_insert(idx: &mut TestIndex, rev: Revision, hex: &str) { - idx.insert(rev, pad_node(hex)); + idx.insert(rev.into(), pad_node(hex)); } fn sample_nodetree() -> NodeTree { @@ -796,7 +810,7 @@ assert_eq!(nt.find_bin(&idx, hex("ab"))?, None); // and with full binary Nodes - assert_eq!(nt.find_node(&idx, idx.get(&1).unwrap())?, Some(1)); + assert_eq!(nt.find_node(&idx, idx.get(&1.into()).unwrap())?, Some(1)); let unknown = Node::from_hex(&hex_pad_right("3d")).unwrap(); assert_eq!(nt.find_node(&idx, &unknown)?, None); Ok(()) @@ -857,14 +871,15 @@ } } - fn insert( - &mut self, - rev: Revision, - hex: &str, - ) -> Result<(), NodeMapError> { + fn insert(&mut self, rev: i32, hex: &str) -> Result<(), NodeMapError> { let node = pad_node(hex); + let rev: UncheckedRevision = rev.into(); self.index.insert(rev, node); - self.nt.insert(&self.index, &node, rev)?; + self.nt.insert( + &self.index, + &node, + self.index.check_revision(rev).unwrap(), + )?; Ok(()) } @@ -971,9 +986,9 @@ let node0 = Node::from_hex(&node0_hex).unwrap(); let node1 = Node::from_hex(&node1_hex).unwrap(); - idx.insert(0, node0); + idx.insert(0.into(), node0); nt.insert(idx, &node0, 0)?; - idx.insert(1, node1); + idx.insert(1.into(), node1); nt.insert(idx, &node1, 1)?; assert_eq!(nt.find_bin(idx, (&node0).into())?, Some(0));
--- a/rust/hg-core/src/revset.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-core/src/revset.rs Thu Aug 10 11:00:34 2023 +0200 @@ -53,7 +53,7 @@ if let Ok(integer) = input.parse::<i32>() { if integer.to_string() == input && integer >= 0 - && revlog.has_rev(integer) + && revlog.has_rev(integer.into()) { return Ok(integer); }
--- a/rust/hg-cpython/src/revlog.rs Mon Sep 11 11:52:33 2023 +0200 +++ b/rust/hg-cpython/src/revlog.rs Thu Aug 10 11:00:34 2023 +0200 @@ -18,7 +18,7 @@ use hg::{ nodemap::{Block, NodeMapError, NodeTree}, revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, - Revision, + Revision, UncheckedRevision, }; use std::cell::RefCell; @@ -252,7 +252,7 @@ // Note that we don't seem to have a direct way to call // PySequence_GetItem (does the job), which would possibly be better // for performance - let key = match key.extract::<Revision>(py) { + let key = match key.extract::<i32>(py) { Ok(rev) => rev.to_py_object(py).into_object(), Err(_) => key, }; @@ -268,7 +268,7 @@ // this is an equivalent implementation of the index_contains() // defined in revlog.c let cindex = self.cindex(py).borrow(); - match item.extract::<Revision>(py) { + match item.extract::<i32>(py) { Ok(rev) => { Ok(rev >= -1 && rev < cindex.inner().len(py)? as Revision) } @@ -448,9 +448,12 @@ let mut nt = NodeTree::load_bytes(Box::new(bytes), len); let data_tip = - docket.getattr(py, "tip_rev")?.extract::<Revision>(py)?; + docket.getattr(py, "tip_rev")?.extract::<i32>(py)?.into(); self.docket(py).borrow_mut().replace(docket.clone_ref(py)); let idx = self.cindex(py).borrow(); + let data_tip = idx.check_revision(data_tip).ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip)) + })?; let current_tip = idx.len(); for r in (data_tip + 1)..current_tip as Revision { @@ -479,7 +482,7 @@ } } -fn rev_not_in_index(py: Python, rev: Revision) -> PyErr { +fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr { PyErr::new::<ValueError, _>( py, format!(