Mercurial > hg
changeset 44143:7f86426fdd2c
rust-node: binary Node ID and conversion utilities
The choice of type makes sure that a `Node` has the exact
wanted size. We'll use a different type for prefixes.
Added dependency: hexadecimal conversion relies on the
`hex` crate.
The fact that sooner or later Mercurial is going to need
to change its hash sizes has been taken strongly in
consideration:
- the hash length is a constant, but that is not directly
exposed to callers. Changing the value of that constant
is the only thing to do to change the hash length (even
in unit tests)
- the code could be adapted to support several sizes of hashes,
if that turned out to be useful. To that effect, only the
size of a given `Node` is exposed in the public API.
- callers not involved in initial computation, I/O and FFI
are able to operate without a priori assumptions on the hash
size. The traits `FromHex` and `ToHex` have not been directly
implemented, so that the doc-comments explaining these
restrictions would stay really visible in `cargo doc`
Differential Revision: https://phab.mercurial-scm.org/D7788
author | Georges Racinet <georges.racinet@octobus.net> |
---|---|
date | Wed, 22 Jan 2020 16:37:05 +0100 |
parents | 63db6657d280 |
children | bd0de73cf810 |
files | rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/revlog.rs rust/hg-core/src/revlog/node.rs |
diffstat | 4 files changed, 201 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/Cargo.lock Wed Jan 22 16:23:29 2020 +0100 +++ b/rust/Cargo.lock Wed Jan 22 16:37:05 2020 +0100 @@ -124,10 +124,16 @@ ] [[package]] +name = "hex" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] name = "hg-core" version = "0.1.0" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", @@ -483,6 +489,7 @@ "checksum either 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "bb1f6b1ce1c140482ea30ddd3335fc0024ac7ee112895426e0a629a6c20adfe3" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum getrandom 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "473a1265acc8ff1e808cd0a1af8cee3c2ee5200916058a2ca113c29f2d903571" +"checksum hex 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "023b39be39e3a2da62a94feb433e91e8bcd37676fbc8bea371daf52b7a769a3e" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum libc 0.2.64 (registry+https://github.com/rust-lang/crates.io-index)" = "74dfca3d9957906e8d1e6a0b641dc9a59848e793f1da2165889fd4f62d10d79c" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
--- a/rust/hg-core/Cargo.toml Wed Jan 22 16:23:29 2020 +0100 +++ b/rust/hg-core/Cargo.toml Wed Jan 22 16:37:05 2020 +0100 @@ -10,6 +10,7 @@ [dependencies] byteorder = "1.3.1" +hex = "0.4.0" lazy_static = "1.3.0" memchr = "2.2.0" rand = "0.6.5"
--- a/rust/hg-core/src/revlog.rs Wed Jan 22 16:23:29 2020 +0100 +++ b/rust/hg-core/src/revlog.rs Wed Jan 22 16:37:05 2020 +0100 @@ -5,7 +5,9 @@ // GNU General Public License version 2 or any later version. //! Mercurial concepts for handling revision history +pub mod node; pub mod nodemap; +pub use node::{Node, NodeError}; /// Mercurial revision numbers ///
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/revlog/node.rs Wed Jan 22 16:37:05 2020 +0100 @@ -0,0 +1,191 @@ +// Copyright 2019-2020 Georges Racinet <georges.racinet@octobus.net> +// +// This software may be used and distributed according to the terms of the +// GNU General Public License version 2 or any later version. + +//! Definitions and utilities for Revision nodes +//! +//! In Mercurial code base, it is customary to call "a node" the binary SHA +//! of a revision. + +use hex::{self, FromHex, FromHexError}; + +/// The length in bytes of a `Node` +/// +/// This constant is meant to ease refactors of this module, and +/// are private so that calling code does not expect all nodes have +/// the same size, should we support several formats concurrently in +/// the future. +const NODE_BYTES_LENGTH: usize = 20; + +/// The length in bytes of a `Node` +/// +/// see also `NODES_BYTES_LENGTH` about it being private. +const NODE_NYBBLES_LENGTH: usize = 2 * NODE_BYTES_LENGTH; + +/// Private alias for readability and to ease future change +type NodeData = [u8; NODE_BYTES_LENGTH]; + +/// Binary revision SHA +/// +/// ## Future changes of hash size +/// +/// To accomodate future changes of hash size, Rust callers +/// should use the conversion methods at the boundaries (FFI, actual +/// computation of hashes and I/O) only, and only if required. +/// +/// All other callers outside of unit tests should just handle `Node` values +/// and never make any assumption on the actual length, using [`nybbles_len`] +/// if they need a loop boundary. +/// +/// All methods that create a `Node` either take a type that enforces +/// the size or fail immediately at runtime with [`ExactLengthRequired`]. +/// +/// [`nybbles_len`]: #method.nybbles_len +/// [`ExactLengthRequired`]: struct.NodeError#variant.ExactLengthRequired +#[derive(Clone, Debug, PartialEq)] +pub struct Node { + data: NodeData, +} + +/// The node value for NULL_REVISION +pub const NULL_NODE: Node = Node { + data: [0; NODE_BYTES_LENGTH], +}; + +impl From<NodeData> for Node { + fn from(data: NodeData) -> Node { + Node { data } + } +} + +#[derive(Debug, PartialEq)] +pub enum NodeError { + ExactLengthRequired(usize, String), + HexError(FromHexError, String), +} + +/// Low level utility function, also for prefixes +fn get_nybble(s: &[u8], i: usize) -> u8 { + if i % 2 == 0 { + s[i / 2] >> 4 + } else { + s[i / 2] & 0x0f + } +} + +impl Node { + /// Retrieve the `i`th half-byte of the binary data. + /// + /// This is also the `i`th hexadecimal digit in numeric form, + /// also called a [nybble](https://en.wikipedia.org/wiki/Nibble). + pub fn get_nybble(&self, i: usize) -> u8 { + get_nybble(&self.data, i) + } + + /// Length of the data, in nybbles + pub fn nybbles_len(&self) -> usize { + // public exposure as an instance method only, so that we can + // easily support several sizes of hashes if needed in the future. + NODE_NYBBLES_LENGTH + } + + /// Convert from hexadecimal string representation + /// + /// Exact length is required. + /// + /// To be used in FFI and I/O only, in order to facilitate future + /// changes of hash format. + pub fn from_hex(hex: &str) -> Result<Node, NodeError> { + Ok(NodeData::from_hex(hex) + .map_err(|e| NodeError::from((e, hex)))? + .into()) + } + + /// Convert to hexadecimal string representation + /// + /// To be used in FFI and I/O only, in order to facilitate future + /// changes of hash format. + pub fn encode_hex(&self) -> String { + hex::encode(self.data) + } + + /// Provide access to binary data + /// + /// This is needed by FFI layers, for instance to return expected + /// binary values to Python. + pub fn as_bytes(&self) -> &[u8] { + &self.data + } +} + +impl From<(FromHexError, &str)> for NodeError { + fn from(err_offender: (FromHexError, &str)) -> Self { + let (err, offender) = err_offender; + match err { + FromHexError::InvalidStringLength => { + NodeError::ExactLengthRequired( + NODE_NYBBLES_LENGTH, + offender.to_string(), + ) + } + _ => NodeError::HexError(err, offender.to_string()), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_node() -> Node { + let mut data = [0; NODE_BYTES_LENGTH]; + data.copy_from_slice(&[ + 0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, + 0x98, 0x76, 0x54, 0x32, 0x10, 0xde, 0xad, 0xbe, 0xef, + ]); + data.into() + } + + /// Pad an hexadecimal string to reach `NODE_NYBBLES_LENGTH` + /// + /// The padding is made with zeros + fn hex_pad_right(hex: &str) -> String { + let mut res = hex.to_string(); + while res.len() < NODE_NYBBLES_LENGTH { + res.push('0'); + } + res + } + + fn sample_node_hex() -> String { + hex_pad_right("0123456789abcdeffedcba9876543210deadbeef") + } + + #[test] + fn test_node_from_hex() { + assert_eq!(Node::from_hex(&sample_node_hex()), Ok(sample_node())); + + let mut short = hex_pad_right("0123"); + short.pop(); + short.pop(); + assert_eq!( + Node::from_hex(&short), + Err(NodeError::ExactLengthRequired(NODE_NYBBLES_LENGTH, short)), + ); + + let not_hex = hex_pad_right("012... oops"); + assert_eq!( + Node::from_hex(¬_hex), + Err(NodeError::HexError( + FromHexError::InvalidHexCharacter { c: '.', index: 3 }, + not_hex, + )), + ); + } + + #[test] + fn test_node_encode_hex() { + assert_eq!(sample_node().encode_hex(), sample_node_hex()); + } +}