changeset 45537:b0d6309ff50c

hg-core: check data integrity in `Revlog` Check that the hash of the data reconstructed from deltas matches the hash stored in the revision. Differential Revision: https://phab.mercurial-scm.org/D9005
author Antoine Cezar <antoine.cezar@octobus.net>
date Wed, 02 Sep 2020 15:23:25 +0200
parents b1cea0dc9db0
children c2317b7624fd
files rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/dirstate/dirstate_map.rs rust/hg-core/src/revlog/index.rs rust/hg-core/src/revlog/node.rs rust/hg-core/src/revlog/revlog.rs
diffstat 6 files changed, 171 insertions(+), 10 deletions(-) [+]
line wrap: on
line diff
--- a/rust/Cargo.lock	Wed Sep 23 12:26:16 2020 +0200
+++ b/rust/Cargo.lock	Wed Sep 02 15:23:25 2020 +0200
@@ -202,6 +202,16 @@
 ]
 
 [[package]]
+name = "fuchsia-cprng"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "gcc"
+version = "0.3.55"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "getrandom"
 version = "0.1.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -249,6 +259,7 @@
  "rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "rayon 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rust-crypto 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)",
  "same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
  "tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "twox-hash 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -461,6 +472,27 @@
 
 [[package]]
 name = "rand"
+version = "0.3.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.77 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.77 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand"
 version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
@@ -482,6 +514,19 @@
 
 [[package]]
 name = "rand_core"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
+name = "rand_core"
 version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
@@ -536,6 +581,14 @@
 ]
 
 [[package]]
+name = "rdrand"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
 name = "redox_syscall"
 version = "0.1.57"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -576,6 +629,23 @@
 ]
 
 [[package]]
+name = "rust-crypto"
+version = "0.2.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "gcc 0.3.55 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libc 0.2.77 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rand 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)",
+ "rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)",
+ "time 0.1.44 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rustc-serialize"
+version = "0.3.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
 name = "same-file"
 version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -752,6 +822,8 @@
 "checksum either 1.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
 "checksum env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36"
 "checksum flate2 1.0.17 (registry+https://github.com/rust-lang/crates.io-index)" = "766d0e77a2c1502169d4a93ff3b8c15a71fd946cd0126309752104e5f3c46d94"
+"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
+"checksum gcc 0.3.55 (registry+https://github.com/rust-lang/crates.io-index)" = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2"
 "checksum getrandom 0.1.15 (registry+https://github.com/rust-lang/crates.io-index)" = "fc587bc0ec293155d5bfa6b9891ec18a1e330c234f896ea47fbada4cadbe47e6"
 "checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
 "checksum hermit-abi 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c30f6d0bc6b00693347368a67d41b58f2fb851215ff1da49e90fe2c5c667151"
@@ -781,18 +853,25 @@
 "checksum python3-sys 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90af11779515a1e530af60782d273b59ac79d33b0e253c071a728563957c76d4"
 "checksum quick-error 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
 "checksum quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
+"checksum rand 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)" = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c"
+"checksum rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293"
 "checksum rand 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03"
 "checksum rand_chacha 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402"
+"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b"
+"checksum rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc"
 "checksum rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19"
 "checksum rand_distr 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "96977acbdd3a6576fb1d27391900035bf3863d4a16422973a409b488cf29ffb2"
 "checksum rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c"
 "checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
 "checksum rayon 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "cfd016f0c045ad38b5251be2c9c0ab806917f82da4d36b2a327e5166adad9270"
 "checksum rayon-core 1.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e8c4fec834fb6e6d2dd5eece3c7b432a52f0ba887cf40e595190c4107edc08bf"
+"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
 "checksum redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)" = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
 "checksum regex 1.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "9c3780fcf44b193bc4d09f36d2a3c87b251da4a046c87795a0d35f4f927ad8e6"
 "checksum regex-syntax 0.6.18 (registry+https://github.com/rust-lang/crates.io-index)" = "26412eb97c6b088a6997e05f69403a802a92d520de2f8e63c2b65f9e0f47c4e8"
 "checksum remove_dir_all 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
+"checksum rust-crypto 0.2.36 (registry+https://github.com/rust-lang/crates.io-index)" = "f76d05d3993fd5f4af9434e8e436db163a12a9d40e1a58a726f27a01dfd12a2a"
+"checksum rustc-serialize 0.3.24 (registry+https://github.com/rust-lang/crates.io-index)" = "dcf128d1287d2ea9d80910b5f1120d0b8eede3fbf1abe91c40d39ea7d51e6fda"
 "checksum same-file 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
 "checksum scopeguard 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
 "checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
--- a/rust/hg-core/Cargo.toml	Wed Sep 23 12:26:16 2020 +0200
+++ b/rust/hg-core/Cargo.toml	Wed Sep 02 15:23:25 2020 +0200
@@ -25,6 +25,7 @@
 log = "0.4.8"
 memmap = "0.7.0"
 zstd = "0.5.3"
+rust-crypto = "0.2.36"
 
 # We don't use the `miniz-oxide` backend because its minimum Rust version is
 # `1.36`. However, this PR (https://github.com/Frommi/miniz_oxide/pull/84/files)
--- a/rust/hg-core/src/dirstate/dirstate_map.rs	Wed Sep 23 12:26:16 2020 +0200
+++ b/rust/hg-core/src/dirstate/dirstate_map.rs	Wed Sep 02 15:23:25 2020 +0200
@@ -5,6 +5,7 @@
 // This software may be used and distributed according to the terms of the
 // GNU General Public License version 2 or any later version.
 
+use crate::revlog::node::NULL_NODE_ID;
 use crate::{
     dirstate::{parsers::PARENT_SIZE, EntryState, SIZE_FROM_OTHER_PARENT},
     pack_dirstate, parse_dirstate,
@@ -24,7 +25,6 @@
 
 pub type FileFoldMap = FastHashMap<HgPathBuf, HgPathBuf>;
 
-const NULL_ID: [u8; 20] = [0; 20];
 const MTIME_UNSET: i32 = -1;
 
 #[derive(Default)]
@@ -72,8 +72,8 @@
         self.non_normal_set = None;
         self.other_parent_set = None;
         self.set_parents(&DirstateParents {
-            p1: NULL_ID,
-            p2: NULL_ID,
+            p1: NULL_NODE_ID,
+            p2: NULL_NODE_ID,
         })
     }
 
@@ -340,8 +340,8 @@
             };
         } else if file_contents.is_empty() {
             parents = DirstateParents {
-                p1: NULL_ID,
-                p2: NULL_ID,
+                p1: NULL_NODE_ID,
+                p2: NULL_NODE_ID,
             };
         } else {
             return Err(DirstateError::Parse(DirstateParseError::Damaged));
--- a/rust/hg-core/src/revlog/index.rs	Wed Sep 23 12:26:16 2020 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Wed Sep 02 15:23:25 2020 +0200
@@ -1,5 +1,6 @@
+use byteorder::{BigEndian, ByteOrder};
+
 use crate::revlog::{Revision, NULL_REVISION};
-use byteorder::{BigEndian, ByteOrder};
 
 pub const INDEX_ENTRY_SIZE: usize = 64;
 
@@ -141,6 +142,22 @@
 
         BigEndian::read_i32(&self.bytes[16..])
     }
+
+    pub fn p1(&self) -> Revision {
+        BigEndian::read_i32(&self.bytes[24..])
+    }
+
+    pub fn p2(&self) -> Revision {
+        BigEndian::read_i32(&self.bytes[28..])
+    }
+
+    /// Return the hash of revision's full text.
+    ///
+    /// Currently, SHA-1 is used and only the first 20 bytes of this field
+    /// are used.
+    pub fn hash(&self) -> &[u8] {
+        &self.bytes[32..52]
+    }
 }
 
 #[cfg(test)]
--- a/rust/hg-core/src/revlog/node.rs	Wed Sep 23 12:26:16 2020 +0200
+++ b/rust/hg-core/src/revlog/node.rs	Wed Sep 02 15:23:25 2020 +0200
@@ -16,7 +16,12 @@
 /// are private so that calling code does not expect all nodes have
 /// the same size, should we support several formats concurrently in
 /// the future.
-const NODE_BYTES_LENGTH: usize = 20;
+pub const NODE_BYTES_LENGTH: usize = 20;
+
+/// Id of the null node.
+///
+/// Used to indicate the absence of node.
+pub const NULL_NODE_ID: [u8; NODE_BYTES_LENGTH] = [0u8; NODE_BYTES_LENGTH];
 
 /// The length in bytes of a `Node`
 ///
--- a/rust/hg-core/src/revlog/revlog.rs	Wed Sep 23 12:26:16 2020 +0200
+++ b/rust/hg-core/src/revlog/revlog.rs	Wed Sep 02 15:23:25 2020 +0200
@@ -5,12 +5,15 @@
 use std::path::Path;
 
 use byteorder::{BigEndian, ByteOrder};
+use crypto::digest::Digest;
+use crypto::sha1::Sha1;
 use flate2::read::ZlibDecoder;
 use memmap::{Mmap, MmapOptions};
 use micro_timer::timed;
 use zstd;
 
 use super::index::Index;
+use super::node::{NODE_BYTES_LENGTH, NULL_NODE_ID};
 use super::patch;
 use crate::revlog::Revision;
 
@@ -93,13 +96,52 @@
                 .map_err(|_| RevlogError::Corrupted)?;
         }
 
-        if delta_chain.is_empty() {
-            Ok(entry.data()?.into())
+        // TODO do not look twice in the index
+        let index = self.index();
+        let index_entry =
+            index.get_entry(rev).ok_or(RevlogError::InvalidRevision)?;
+
+        let data: Vec<u8> = if delta_chain.is_empty() {
+            entry.data()?.into()
+        } else {
+            Revlog::build_data_from_deltas(entry, &delta_chain)?
+        };
+
+        if self.check_hash(
+            index_entry.p1(),
+            index_entry.p2(),
+            index_entry.hash(),
+            &data,
+        ) {
+            Ok(data)
         } else {
-            Revlog::build_data_from_deltas(entry, &delta_chain)
+            Err(RevlogError::Corrupted)
         }
     }
 
+    /// Check the hash of some given data against the recorded hash.
+    pub fn check_hash(
+        &self,
+        p1: Revision,
+        p2: Revision,
+        expected: &[u8],
+        data: &[u8],
+    ) -> bool {
+        let index = self.index();
+        let e1 = index.get_entry(p1);
+        let h1 = match e1 {
+            Some(ref entry) => entry.hash(),
+            None => &NULL_NODE_ID,
+        };
+        let e2 = index.get_entry(p2);
+        let h2 = match e2 {
+            Some(ref entry) => entry.hash(),
+            None => &NULL_NODE_ID,
+        };
+
+        hash(data, &h1, &h2).as_slice() == expected
+    }
+
     /// Build the full data of a revision out its snapshot
     /// and its deltas.
     #[timed]
@@ -234,6 +276,23 @@
     BigEndian::read_u16(&index_bytes[2..=3])
 }
 
+/// Calculate the hash of a revision given its data and its parents.
+fn hash(data: &[u8], p1_hash: &[u8], p2_hash: &[u8]) -> Vec<u8> {
+    let mut hasher = Sha1::new();
+    let (a, b) = (p1_hash, p2_hash);
+    if a > b {
+        hasher.input(b);
+        hasher.input(a);
+    } else {
+        hasher.input(a);
+        hasher.input(b);
+    }
+    hasher.input(data);
+    let mut hash = vec![0; NODE_BYTES_LENGTH];
+    hasher.result(&mut hash);
+    hash
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;