# HG changeset patch # User Simon Sapin # Date 1613560016 -3600 # Node ID f88e8ae0aa8fcde5249c0f902f719ce12a36ebaf # Parent 5ce2aa7c2ad50a996a1b1657754f2ba89c799fa9 rust: Rewrite dirstate parsing usin the `bytes-cast` crate Differential Revision: https://phab.mercurial-scm.org/D10005 diff -r 5ce2aa7c2ad5 -r f88e8ae0aa8f rust/Cargo.lock --- a/rust/Cargo.lock Mon Feb 15 20:13:09 2021 +0100 +++ b/rust/Cargo.lock Wed Feb 17 12:06:56 2021 +0100 @@ -310,7 +310,6 @@ "im-rc 15.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "micro-timer 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", diff -r 5ce2aa7c2ad5 -r f88e8ae0aa8f rust/hg-core/Cargo.toml --- a/rust/hg-core/Cargo.toml Mon Feb 15 20:13:09 2021 +0100 +++ b/rust/hg-core/Cargo.toml Wed Feb 17 12:06:56 2021 +0100 @@ -15,7 +15,6 @@ home = "0.5" im-rc = "15.0.*" lazy_static = "1.4.0" -memchr = "2.3.3" rand = "0.7.3" rand_pcg = "0.2.1" rand_distr = "0.2.2" diff -r 5ce2aa7c2ad5 -r f88e8ae0aa8f rust/hg-core/src/dirstate.rs --- a/rust/hg-core/src/dirstate.rs Mon Feb 15 20:13:09 2021 +0100 +++ b/rust/hg-core/src/dirstate.rs Wed Feb 17 12:06:56 2021 +0100 @@ -7,6 +7,7 @@ use crate::errors::HgError; use crate::{utils::hg_path::HgPathBuf, FastHashMap}; +use bytes_cast::{unaligned, BytesCast}; use std::collections::hash_map; use std::convert::TryFrom; @@ -17,7 +18,8 @@ pub mod parsers; pub mod status; -#[derive(Debug, PartialEq, Clone)] +#[derive(Debug, PartialEq, Clone, BytesCast)] +#[repr(C)] pub struct DirstateParents { pub p1: [u8; 20], pub p2: [u8; 20], @@ -34,6 +36,16 @@ pub size: i32, } +#[derive(BytesCast)] +#[repr(C)] +struct RawEntry { + state: u8, + mode: unaligned::I32Be, + size: unaligned::I32Be, + mtime: unaligned::I32Be, + length: unaligned::I32Be, +} + /// A `DirstateEntry` with a size of `-2` means that it was merged from the /// other parent. This allows revert to pick the right status back during a /// merge. diff -r 5ce2aa7c2ad5 -r f88e8ae0aa8f rust/hg-core/src/dirstate/dirstate_map.rs --- a/rust/hg-core/src/dirstate/dirstate_map.rs Mon Feb 15 20:13:09 2021 +0100 +++ b/rust/hg-core/src/dirstate/dirstate_map.rs Wed Feb 17 12:06:56 2021 +0100 @@ -386,10 +386,10 @@ } #[timed] - pub fn read( + pub fn read<'a>( &mut self, - file_contents: &[u8], - ) -> Result, DirstateError> { + file_contents: &'a [u8], + ) -> Result, DirstateError> { if file_contents.is_empty() { return Ok(None); } diff -r 5ce2aa7c2ad5 -r f88e8ae0aa8f rust/hg-core/src/dirstate/parsers.rs --- a/rust/hg-core/src/dirstate/parsers.rs Mon Feb 15 20:13:09 2021 +0100 +++ b/rust/hg-core/src/dirstate/parsers.rs Wed Feb 17 12:06:56 2021 +0100 @@ -6,13 +6,13 @@ use crate::errors::HgError; use crate::utils::hg_path::HgPath; use crate::{ - dirstate::{CopyMap, EntryState, StateMap}, + dirstate::{CopyMap, EntryState, RawEntry, StateMap}, DirstateEntry, DirstateParents, }; -use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; +use byteorder::{BigEndian, WriteBytesExt}; +use bytes_cast::BytesCast; use micro_timer::timed; use std::convert::{TryFrom, TryInto}; -use std::io::Cursor; use std::time::Duration; /// Parents are stored in the dirstate as byte hashes. @@ -21,65 +21,45 @@ const MIN_ENTRY_SIZE: usize = 17; type ParseResult<'a> = ( - DirstateParents, + &'a DirstateParents, Vec<(&'a HgPath, DirstateEntry)>, Vec<(&'a HgPath, &'a HgPath)>, ); #[timed] -pub fn parse_dirstate(contents: &[u8]) -> Result { - if contents.len() < PARENT_SIZE * 2 { - return Err(HgError::corrupted("Too little data for dirstate.")); - } - let mut copies = vec![]; - let mut entries = vec![]; +pub fn parse_dirstate(mut contents: &[u8]) -> Result { + let mut copies = Vec::new(); + let mut entries = Vec::new(); - let mut curr_pos = PARENT_SIZE * 2; - let parents = DirstateParents { - p1: contents[..PARENT_SIZE].try_into().unwrap(), - p2: contents[PARENT_SIZE..curr_pos].try_into().unwrap(), - }; + let (parents, rest) = DirstateParents::from_bytes(contents) + .map_err(|_| HgError::corrupted("Too little data for dirstate."))?; + contents = rest; + while !contents.is_empty() { + let (raw_entry, rest) = RawEntry::from_bytes(contents) + .map_err(|_| HgError::corrupted("Overflow in dirstate."))?; - while curr_pos < contents.len() { - if curr_pos + MIN_ENTRY_SIZE > contents.len() { - return Err(HgError::corrupted("Overflow in dirstate.")); - } - let entry_bytes = &contents[curr_pos..]; + let entry = DirstateEntry { + state: EntryState::try_from(raw_entry.state)?, + mode: raw_entry.mode.get(), + mtime: raw_entry.mtime.get(), + size: raw_entry.size.get(), + }; + let (paths, rest) = + u8::slice_from_bytes(rest, raw_entry.length.get() as usize) + .map_err(|_| HgError::corrupted("Overflow in dirstate."))?; - let mut cursor = Cursor::new(entry_bytes); - // Unwraping errors from `byteorder` as we’ve already checked - // `MIN_ENTRY_SIZE` so the input should never be too short. - let state = EntryState::try_from(cursor.read_u8().unwrap())?; - let mode = cursor.read_i32::().unwrap(); - let size = cursor.read_i32::().unwrap(); - let mtime = cursor.read_i32::().unwrap(); - let path_len = cursor.read_i32::().unwrap() as usize; - - if path_len > contents.len() - curr_pos { - return Err(HgError::corrupted("Overflow in dirstate.")); + // `paths` is either a single path, or two paths separated by a NULL + // byte + let mut iter = paths.splitn(2, |&byte| byte == b'\0'); + let path = HgPath::new( + iter.next().expect("splitn always yields at least one item"), + ); + if let Some(copy_source) = iter.next() { + copies.push((path, HgPath::new(copy_source))); } - // Slice instead of allocating a Vec needed for `read_exact` - let path = &entry_bytes[MIN_ENTRY_SIZE..MIN_ENTRY_SIZE + (path_len)]; - - let (path, copy) = match memchr::memchr(0, path) { - None => (path, None), - Some(i) => (&path[..i], Some(&path[(i + 1)..])), - }; - - if let Some(copy_path) = copy { - copies.push((HgPath::new(path), HgPath::new(copy_path))); - }; - entries.push(( - HgPath::new(path), - DirstateEntry { - state, - mode, - size, - mtime, - }, - )); - curr_pos = curr_pos + MIN_ENTRY_SIZE + (path_len); + entries.push((path, entry)); + contents = rest; } Ok((parents, entries, copies)) } @@ -374,7 +354,7 @@ .collect(); assert_eq!( - (parents, state_map, copymap), + (&parents, state_map, copymap), (new_parents, new_state_map, new_copy_map) ) } @@ -452,7 +432,7 @@ .collect(); assert_eq!( - (parents, state_map, copymap), + (&parents, state_map, copymap), (new_parents, new_state_map, new_copy_map) ) } @@ -499,7 +479,7 @@ assert_eq!( ( - parents, + &parents, [( HgPathBuf::from_bytes(b"f1"), DirstateEntry {