rust-node: binary Node ID and conversion utilities
authorGeorges Racinet <georges.racinet@octobus.net>
Wed, 22 Jan 2020 16:37:05 +0100
changeset 44143 7f86426fdd2c
parent 44142 63db6657d280
child 44144 bd0de73cf810
rust-node: binary Node ID and conversion utilities The choice of type makes sure that a `Node` has the exact wanted size. We'll use a different type for prefixes. Added dependency: hexadecimal conversion relies on the `hex` crate. The fact that sooner or later Mercurial is going to need to change its hash sizes has been taken strongly in consideration: - the hash length is a constant, but that is not directly exposed to callers. Changing the value of that constant is the only thing to do to change the hash length (even in unit tests) - the code could be adapted to support several sizes of hashes, if that turned out to be useful. To that effect, only the size of a given `Node` is exposed in the public API. - callers not involved in initial computation, I/O and FFI are able to operate without a priori assumptions on the hash size. The traits `FromHex` and `ToHex` have not been directly implemented, so that the doc-comments explaining these restrictions would stay really visible in `cargo doc` Differential Revision: https://phab.mercurial-scm.org/D7788
rust/Cargo.lock
rust/hg-core/Cargo.toml
rust/hg-core/src/revlog.rs
rust/hg-core/src/revlog/node.rs
--- a/rust/Cargo.lock	Wed Jan 22 16:23:29 2020 +0100
+++ b/rust/Cargo.lock	Wed Jan 22 16:37:05 2020 +0100
@@ -124,10 +124,16 @@
 ]
 
 [[package]]
+name = "hex"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "hg-core"
 version = "0.1.0"
 dependencies = [
  "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -483,6 +489,7 @@
 "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3"
 "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
 "checksum getrandom 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571"
+"checksum hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "023b39be39e3a2da62a94feb433e91e8bcd37676fbc8bea371daf52b7a769a3e"
 "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 "checksum libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)" = "74dfca3d9957906e8d1e6a0b641dc9a59848e793f1da2165889fd4f62d10d79c"
 "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
--- a/rust/hg-core/Cargo.toml	Wed Jan 22 16:23:29 2020 +0100
+++ b/rust/hg-core/Cargo.toml	Wed Jan 22 16:37:05 2020 +0100
@@ -10,6 +10,7 @@
 
 [dependencies]
 byteorder = "1.3.1"
+hex = "0.4.0"
 lazy_static = "1.3.0"
 memchr = "2.2.0"
 rand = "0.6.5"
--- a/rust/hg-core/src/revlog.rs	Wed Jan 22 16:23:29 2020 +0100
+++ b/rust/hg-core/src/revlog.rs	Wed Jan 22 16:37:05 2020 +0100
@@ -5,7 +5,9 @@
 // GNU General Public License version 2 or any later version.
 //! Mercurial concepts for handling revision history
 
+pub mod node;
 pub mod nodemap;
+pub use node::{Node, NodeError};
 
 /// Mercurial revision numbers
 ///
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-core/src/revlog/node.rs	Wed Jan 22 16:37:05 2020 +0100
@@ -0,0 +1,191 @@
+// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Definitions and utilities for Revision nodes
+//!
+//! In Mercurial code base, it is customary to call "a node" the binary SHA
+//! of a revision.
+
+use hex::{self, FromHex, FromHexError};
+
+/// The length in bytes of a `Node`
+///
+/// This constant is meant to ease refactors of this module, and
+/// are private so that calling code does not expect all nodes have
+/// the same size, should we support several formats concurrently in
+/// the future.
+const NODE_BYTES_LENGTH: usize = 20;
+
+/// The length in bytes of a `Node`
+///
+/// see also `NODES_BYTES_LENGTH` about it being private.
+const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH;
+
+/// Private alias for readability and to ease future change
+type NodeData = [u8; NODE_BYTES_LENGTH];
+
+/// Binary revision SHA
+///
+/// ## Future changes of hash size
+///
+/// To accomodate future changes of hash size, Rust callers
+/// should use the conversion methods at the boundaries (FFI, actual
+/// computation of hashes and I/O) only, and only if required.
+///
+/// All other callers outside of unit tests should just handle `Node` values
+/// and never make any assumption on the actual length, using [`nybbles_len`]
+/// if they need a loop boundary.
+///
+/// All methods that create a `Node` either take a type that enforces
+/// the size or fail immediately at runtime with [`ExactLengthRequired`].
+///
+/// [`nybbles_len`]: #method.nybbles_len
+/// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired
+#[derive(Clone, Debug, PartialEq)]
+pub struct Node {
+    data: NodeData,
+}
+
+/// The node value for NULL_REVISION
+pub const NULL_NODE: Node = Node {
+    data: [0; NODE_BYTES_LENGTH],
+};
+
+impl From<NodeData> for Node {
+    fn from(data: NodeData) -> Node {
+        Node { data }
+    }
+}
+
+#[derive(Debug, PartialEq)]
+pub enum NodeError {
+    ExactLengthRequired(usize, String),
+    HexError(FromHexError, String),
+}
+
+/// Low level utility function, also for prefixes
+fn get_nybble(s: &[u8], i: usize) -> u8 {
+    if i % 2 == 0 {
+        s[i / 2] >> 4
+    } else {
+        s[i / 2] & 0x0f
+    }
+}
+
+impl Node {
+    /// Retrieve the `i`th half-byte of the binary data.
+    ///
+    /// This is also the `i`th hexadecimal digit in numeric form,
+    /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble).
+    pub fn get_nybble(&self, i: usize) -> u8 {
+        get_nybble(&self.data, i)
+    }
+
+    /// Length of the data, in nybbles
+    pub fn nybbles_len(&self) -> usize {
+        // public exposure as an instance method only, so that we can
+        // easily support several sizes of hashes if needed in the future.
+        NODE_NYBBLES_LENGTH
+    }
+
+    /// Convert from hexadecimal string representation
+    ///
+    /// Exact length is required.
+    ///
+    /// To be used in FFI and I/O only, in order to facilitate future
+    /// changes of hash format.
+    pub fn from_hex(hex: &str) -> Result<Node, NodeError> {
+        Ok(NodeData::from_hex(hex)
+            .map_err(|e| NodeError::from((e, hex)))?
+            .into())
+    }
+
+    /// Convert to hexadecimal string representation
+    ///
+    /// To be used in FFI and I/O only, in order to facilitate future
+    /// changes of hash format.
+    pub fn encode_hex(&self) -> String {
+        hex::encode(self.data)
+    }
+
+    /// Provide access to binary data
+    ///
+    /// This is needed by FFI layers, for instance to return expected
+    /// binary values to Python.
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.data
+    }
+}
+
+impl From<(FromHexError, &str)> for NodeError {
+    fn from(err_offender: (FromHexError, &str)) -> Self {
+        let (err, offender) = err_offender;
+        match err {
+            FromHexError::InvalidStringLength => {
+                NodeError::ExactLengthRequired(
+                    NODE_NYBBLES_LENGTH,
+                    offender.to_string(),
+                )
+            }
+            _ => NodeError::HexError(err, offender.to_string()),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn sample_node() -> Node {
+        let mut data = [0; NODE_BYTES_LENGTH];
+        data.copy_from_slice(&[
+            0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba,
+            0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef,
+        ]);
+        data.into()
+    }
+
+    /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH`
+    ///
+    /// The padding is made with zeros
+    fn hex_pad_right(hex: &str) -> String {
+        let mut res = hex.to_string();
+        while res.len() < NODE_NYBBLES_LENGTH {
+            res.push('0');
+        }
+        res
+    }
+
+    fn sample_node_hex() -> String {
+        hex_pad_right("0123456789abcdeffedcba9876543210deadbeef")
+    }
+
+    #[test]
+    fn test_node_from_hex() {
+        assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node()));
+
+        let mut short = hex_pad_right("0123");
+        short.pop();
+        short.pop();
+        assert_eq!(
+            Node::from_hex(&short),
+            Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)),
+        );
+
+        let not_hex = hex_pad_right("012... oops");
+        assert_eq!(
+            Node::from_hex(&not_hex),
+            Err(NodeError::HexError(
+                FromHexError::InvalidHexCharacter { c: '.', index: 3 },
+                not_hex,
+            )),
+        );
+    }
+
+    #[test]
+    fn test_node_encode_hex() {
+        assert_eq!(sample_node().encode_hex(), sample_node_hex());
+    }
+}