changeset 46037:88e741bf2d93

rust: use NodePrefix::from_hex instead of hex::decode directly This adds support for prefixes with an odd number of hex digits. Differential Revision: https://phab.mercurial-scm.org/D9490
author Simon Sapin <simon-commits@exyr.org>
date Wed, 02 Dec 2020 15:00:49 +0100
parents 8d6164098782
children 0c02c2a0badb
files rust/hg-core/src/operations/cat.rs rust/hg-core/src/operations/debugdata.rs rust/hg-core/src/operations/list_tracked_files.rs rust/hg-core/src/revlog/changelog.rs rust/hg-core/src/revlog/index.rs rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/revlog/node.rs rust/hg-core/src/revlog/revlog.rs tests/test-rhg.t
diffstat 9 files changed, 74 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/operations/cat.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/operations/cat.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -13,6 +13,8 @@
 use crate::revlog::path_encode::path_encode;
 use crate::revlog::revlog::Revlog;
 use crate::revlog::revlog::RevlogError;
+use crate::revlog::Node;
+use crate::revlog::NodePrefix;
 use crate::revlog::Revision;
 use crate::utils::files::get_path_from_bytes;
 use crate::utils::hg_path::{HgPath, HgPathBuf};
@@ -108,15 +110,16 @@
         let changelog_entry = match self.rev.parse::<Revision>() {
             Ok(rev) => self.changelog.get_rev(rev)?,
             _ => {
-                let changelog_node = hex::decode(&self.rev)
+                let changelog_node = NodePrefix::from_hex(&self.rev)
                     .map_err(|_| CatRevErrorKind::InvalidRevision)?;
-                self.changelog.get_node(&changelog_node)?
+                self.changelog.get_node(changelog_node.borrow())?
             }
         };
-        let manifest_node = hex::decode(&changelog_entry.manifest_node()?)
+        let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
             .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
 
-        self.manifest_entry = Some(self.manifest.get_node(&manifest_node)?);
+        self.manifest_entry =
+            Some(self.manifest.get_node((&manifest_node).into())?);
         if let Some(ref manifest_entry) = self.manifest_entry {
             let mut bytes = vec![];
 
@@ -132,9 +135,10 @@
 
                         let file_log =
                             Revlog::open(&index_path, Some(&data_path))?;
-                        let file_node = hex::decode(&node_bytes)
+                        let file_node = Node::from_hex(node_bytes)
                             .map_err(|_| CatRevErrorKind::CorruptedRevlog)?;
-                        let file_rev = file_log.get_node_rev(&file_node)?;
+                        let file_rev =
+                            file_log.get_node_rev((&file_node).into())?;
                         let data = file_log.get_rev_data(file_rev)?;
                         if data.starts_with(&METADATA_DELIMITER) {
                             let end_delimiter_position = data
--- a/rust/hg-core/src/operations/debugdata.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/operations/debugdata.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -7,6 +7,7 @@
 
 use super::find_root;
 use crate::revlog::revlog::{Revlog, RevlogError};
+use crate::revlog::NodePrefix;
 use crate::revlog::Revision;
 
 /// Kind of data to debug
@@ -107,9 +108,9 @@
         let data = match self.rev.parse::<Revision>() {
             Ok(rev) => revlog.get_rev_data(rev)?,
             _ => {
-                let node = hex::decode(&self.rev)
+                let node = NodePrefix::from_hex(&self.rev)
                     .map_err(|_| DebugDataErrorKind::InvalidRevision)?;
-                let rev = revlog.get_node_rev(&node)?;
+                let rev = revlog.get_node_rev(node.borrow())?;
                 revlog.get_rev_data(rev)?
             }
         };
--- a/rust/hg-core/src/operations/list_tracked_files.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/operations/list_tracked_files.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -8,6 +8,7 @@
 use crate::dirstate::parsers::parse_dirstate;
 use crate::revlog::changelog::Changelog;
 use crate::revlog::manifest::{Manifest, ManifestEntry};
+use crate::revlog::node::{Node, NodePrefix};
 use crate::revlog::revlog::RevlogError;
 use crate::revlog::Revision;
 use crate::utils::hg_path::HgPath;
@@ -171,15 +172,16 @@
         let changelog_entry = match self.rev.parse::<Revision>() {
             Ok(rev) => self.changelog.get_rev(rev)?,
             _ => {
-                let changelog_node = hex::decode(&self.rev)
+                let changelog_node = NodePrefix::from_hex(&self.rev)
                     .or(Err(ListRevTrackedFilesErrorKind::InvalidRevision))?;
-                self.changelog.get_node(&changelog_node)?
+                self.changelog.get_node(changelog_node.borrow())?
             }
         };
-        let manifest_node = hex::decode(&changelog_entry.manifest_node()?)
+        let manifest_node = Node::from_hex(&changelog_entry.manifest_node()?)
             .or(Err(ListRevTrackedFilesErrorKind::CorruptedRevlog))?;
 
-        self.manifest_entry = Some(self.manifest.get_node(&manifest_node)?);
+        self.manifest_entry =
+            Some(self.manifest.get_node((&manifest_node).into())?);
 
         if let Some(ref manifest_entry) = self.manifest_entry {
             Ok(manifest_entry.files())
--- a/rust/hg-core/src/revlog/changelog.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/revlog/changelog.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -1,4 +1,5 @@
 use crate::revlog::revlog::{Revlog, RevlogError};
+use crate::revlog::NodePrefixRef;
 use crate::revlog::Revision;
 use std::path::PathBuf;
 
@@ -19,7 +20,7 @@
     /// Return the `ChangelogEntry` a given node id.
     pub fn get_node(
         &self,
-        node: &[u8],
+        node: NodePrefixRef,
     ) -> Result<ChangelogEntry, RevlogError> {
         let rev = self.revlog.get_node_rev(node)?;
         self.get_rev(rev)
--- a/rust/hg-core/src/revlog/index.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/revlog/index.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -1,7 +1,9 @@
+use std::convert::TryInto;
 use std::ops::Deref;
 
 use byteorder::{BigEndian, ByteOrder};
 
+use crate::revlog::node::Node;
 use crate::revlog::revlog::RevlogError;
 use crate::revlog::{Revision, NULL_REVISION};
 
@@ -188,8 +190,8 @@
     ///
     /// Currently, SHA-1 is used and only the first 20 bytes of this field
     /// are used.
-    pub fn hash(&self) -> &[u8] {
-        &self.bytes[32..52]
+    pub fn hash(&self) -> &Node {
+        (&self.bytes[32..52]).try_into().unwrap()
     }
 }
 
--- a/rust/hg-core/src/revlog/manifest.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/revlog/manifest.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -1,4 +1,5 @@
 use crate::revlog::revlog::{Revlog, RevlogError};
+use crate::revlog::NodePrefixRef;
 use crate::revlog::Revision;
 use crate::utils::hg_path::HgPath;
 use std::path::PathBuf;
@@ -18,7 +19,10 @@
     }
 
     /// Return the `ManifestEntry` of a given node id.
-    pub fn get_node(&self, node: &[u8]) -> Result<ManifestEntry, RevlogError> {
+    pub fn get_node(
+        &self,
+        node: NodePrefixRef,
+    ) -> Result<ManifestEntry, RevlogError> {
         let rev = self.revlog.get_node_rev(node)?;
         self.get_rev(rev)
     }
--- a/rust/hg-core/src/revlog/node.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/revlog/node.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -9,6 +9,7 @@
 //! of a revision.
 
 use hex::{self, FromHex, FromHexError};
+use std::convert::{TryFrom, TryInto};
 
 /// The length in bytes of a `Node`
 ///
@@ -65,6 +66,19 @@
     }
 }
 
+/// Return an error if the slice has an unexpected length
+impl<'a> TryFrom<&'a [u8]> for &'a Node {
+    type Error = std::array::TryFromSliceError;
+
+    #[inline]
+    fn try_from(bytes: &'a [u8]) -> Result<&'a Node, Self::Error> {
+        let data = bytes.try_into()?;
+        // Safety: `#[repr(transparent)]` makes it ok to "wrap" the target
+        // of a reference to the type of the single field.
+        Ok(unsafe { std::mem::transmute::<&NodeData, &Node>(data) })
+    }
+}
+
 #[derive(Debug, PartialEq)]
 pub enum NodeError {
     ExactLengthRequired(usize, String),
@@ -103,8 +117,8 @@
     ///
     /// To be used in FFI and I/O only, in order to facilitate future
     /// changes of hash format.
-    pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
-        Ok(NodeData::from_hex(hex)
+    pub fn from_hex(hex: impl AsRef<[u8]>) -> Result<Node, NodeError> {
+        Ok(NodeData::from_hex(hex.as_ref())
             .map_err(|e| NodeError::from((e, hex)))?
             .into())
     }
@@ -126,17 +140,15 @@
     }
 }
 
-impl<T: AsRef<str>> From<(FromHexError, T)> for NodeError {
+impl<T: AsRef<[u8]>> From<(FromHexError, T)> for NodeError {
     fn from(err_offender: (FromHexError, T)) -> Self {
         let (err, offender) = err_offender;
+        let offender = String::from_utf8_lossy(offender.as_ref()).into_owned();
         match err {
             FromHexError::InvalidStringLength => {
-                NodeError::ExactLengthRequired(
-                    NODE_NYBBLES_LENGTH,
-                    offender.as_ref().to_owned(),
-                )
+                NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, offender)
             }
-            _ => NodeError::HexError(err, offender.as_ref().to_owned()),
+            _ => NodeError::HexError(err, offender),
         }
     }
 }
@@ -171,8 +183,8 @@
 
         let is_odd = len % 2 == 1;
         let even_part = if is_odd { &hex[..len - 1] } else { hex };
-        let mut buf: Vec<u8> = Vec::from_hex(&even_part)
-            .map_err(|e| (e, String::from_utf8_lossy(hex)))?;
+        let mut buf: Vec<u8> =
+            Vec::from_hex(&even_part).map_err(|e| (e, hex))?;
 
         if is_odd {
             let latest_char = char::from(hex[len - 1]);
@@ -182,7 +194,7 @@
                         c: latest_char,
                         index: len - 1,
                     },
-                    String::from_utf8_lossy(hex),
+                    hex,
                 )
             })? as u8;
             buf.push(latest_nybble << 4);
@@ -278,6 +290,12 @@
     }
 }
 
+impl PartialEq<Node> for NodePrefixRef<'_> {
+    fn eq(&self, other: &Node) -> bool {
+        !self.is_odd && self.buf == other.data
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -292,7 +310,7 @@
     }
 
     /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
-    ///
+    ///check_hash
     /// The padding is made with zeros
     pub fn hex_pad_right(hex: &str) -> String {
         let mut res = hex.to_string();
--- a/rust/hg-core/src/revlog/revlog.rs	Mon Nov 30 19:34:49 2020 +0100
+++ b/rust/hg-core/src/revlog/revlog.rs	Wed Dec 02 15:00:49 2020 +0100
@@ -13,7 +13,7 @@
 use zstd;
 
 use super::index::Index;
-use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
+use super::node::{NodePrefixRef, NODE_BYTES_LENGTH, NULL_NODE};
 use super::patch;
 use crate::revlog::Revision;
 
@@ -92,17 +92,20 @@
 
     /// Return the full data associated to a node.
     #[timed]
-    pub fn get_node_rev(&self, node: &[u8]) -> Result<Revision, RevlogError> {
+    pub fn get_node_rev(
+        &self,
+        node: NodePrefixRef,
+    ) -> Result<Revision, RevlogError> {
         // This is brute force. But it is fast enough for now.
         // Optimization will come later.
         let mut found_by_prefix = None;
         for rev in (0..self.len() as Revision).rev() {
             let index_entry =
                 self.index.get_entry(rev).ok_or(RevlogError::Corrupted)?;
-            if index_entry.hash() == node {
+            if node == *index_entry.hash() {
                 return Ok(rev);
             }
-            if index_entry.hash().starts_with(node) {
+            if node.is_prefix_of(index_entry.hash()) {
                 if found_by_prefix.is_some() {
                     return Err(RevlogError::AmbiguousPrefix);
                 }
@@ -143,7 +146,7 @@
         if self.check_hash(
             index_entry.p1(),
             index_entry.p2(),
-            index_entry.hash(),
+            index_entry.hash().as_bytes(),
             &data,
         ) {
             Ok(data)
@@ -163,15 +166,15 @@
         let e1 = self.index.get_entry(p1);
         let h1 = match e1 {
             Some(ref entry) => entry.hash(),
-            None => &NULL_NODE_ID,
+            None => &NULL_NODE,
         };
         let e2 = self.index.get_entry(p2);
         let h2 = match e2 {
             Some(ref entry) => entry.hash(),
-            None => &NULL_NODE_ID,
+            None => &NULL_NODE,
         };
 
-        hash(data, &h1, &h2).as_slice() == expected
+        hash(data, h1.as_bytes(), h2.as_bytes()).as_slice() == expected
     }
 
     /// Build the full data of a revision out its snapshot
--- a/tests/test-rhg.t	Mon Nov 30 19:34:49 2020 +0100
+++ b/tests/test-rhg.t	Wed Dec 02 15:00:49 2020 +0100
@@ -115,8 +115,10 @@
   $ rhg cat -r cf8b83 file-2
   2
   $ rhg cat -r c file-2
-  abort: invalid revision identifier c
+  abort: ambiguous revision identifier c
   [255]
+  $ rhg cat -r d file-2
+  2
 
 Cat files
   $ cd $TESTTMP