Mercurial > hg-stable
changeset 48496:1fb3615dfce2
rhg: centralize index header parsing
Centralize index header parsing, parse the generaldelta flag,
and leave breadcrumbs to relate the code to python.
Differential Revision: https://phab.mercurial-scm.org/D11881
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Tue, 07 Dec 2021 18:12:13 +0000 |
parents | e33d7c39db47 |
children | 96ea4db4741b |
files | rust/hg-core/src/revlog/index.rs rust/hg-core/src/revlog/revlog.rs |
diffstat | 2 files changed, 117 insertions(+), 56 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/index.rs Tue Dec 07 17:50:19 2021 +0000 +++ b/rust/hg-core/src/revlog/index.rs Tue Dec 07 18:12:13 2021 +0000 @@ -9,6 +9,75 @@ pub const INDEX_ENTRY_SIZE: usize = 64; +pub struct IndexHeader { + header_bytes: [u8; 4], +} + +#[derive(Copy, Clone)] +pub struct IndexHeaderFlags { + flags: u16, +} + +/// Corresponds to the high bits of `_format_flags` in python +impl IndexHeaderFlags { + /// Corresponds to FLAG_INLINE_DATA in python + pub fn is_inline(self) -> bool { + return self.flags & 1 != 0; + } + /// Corresponds to FLAG_GENERALDELTA in python + pub fn uses_generaldelta(self) -> bool { + return self.flags & 2 != 0; + } +} + +/// Corresponds to the INDEX_HEADER structure, +/// which is parsed as a `header` variable in `_loadindex` in `revlog.py` +impl IndexHeader { + fn format_flags(&self) -> IndexHeaderFlags { + // No "unknown flags" check here, unlike in python. Maybe there should + // be. + return IndexHeaderFlags { + flags: BigEndian::read_u16(&self.header_bytes[0..2]), + }; + } + + /// The only revlog version currently supported by rhg. + const REVLOGV1: u16 = 1; + + /// Corresponds to `_format_version` in Python. + fn format_version(&self) -> u16 { + return BigEndian::read_u16(&self.header_bytes[2..4]); + } + + const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader { + // We treat an empty file as a valid index with no entries. + // Here we make an arbitrary choice of what we assume the format of the + // index to be (V1, using generaldelta). + // This doesn't matter too much, since we're only doing read-only + // access. but the value corresponds to the `new_header` variable in + // `revlog.py`, `_loadindex` + header_bytes: [0, 3, 0, 1], + }; + + fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> { + if index_bytes.len() == 0 { + return Ok(IndexHeader::EMPTY_INDEX_HEADER); + } + if index_bytes.len() < 4 { + return Err(HgError::corrupted( + "corrupted revlog: can't read the index format header", + )); + } + return Ok(IndexHeader { + header_bytes: { + let bytes: [u8; 4] = + index_bytes[0..4].try_into().expect("impossible"); + bytes + }, + }); + } +} + /// A Revlog index pub struct Index { bytes: Box<dyn Deref<Target = [u8]> + Send>, @@ -23,7 +92,15 @@ pub fn new( bytes: Box<dyn Deref<Target = [u8]> + Send>, ) -> Result<Self, HgError> { - if is_inline(&bytes) { + let header = IndexHeader::parse(bytes.as_ref())?; + + if header.format_version() != IndexHeader::REVLOGV1 { + // A proper new version should have had a repo/store + // requirement. + return Err(HgError::corrupted("unsupported revlog version")); + } + + if header.format_flags().is_inline() { let mut offset: usize = 0; let mut offsets = Vec::new(); @@ -206,17 +283,6 @@ } } -/// Value of the inline flag. -pub fn is_inline(index_bytes: &[u8]) -> bool { - if index_bytes.len() < 4 { - return true; - } - match &index_bytes[0..=1] { - [0, 0] | [0, 2] => false, - _ => true, - } -} - #[cfg(test)] mod tests { use super::*; @@ -313,37 +379,60 @@ } } + pub fn is_inline(index_bytes: &[u8]) -> bool { + IndexHeader::parse(index_bytes) + .expect("too short") + .format_flags() + .is_inline() + } + + pub fn uses_generaldelta(index_bytes: &[u8]) -> bool { + IndexHeader::parse(index_bytes) + .expect("too short") + .format_flags() + .uses_generaldelta() + } + + pub fn get_version(index_bytes: &[u8]) -> u16 { + IndexHeader::parse(index_bytes) + .expect("too short") + .format_version() + } + #[test] - fn is_not_inline_when_no_inline_flag_test() { + fn flags_when_no_inline_flag_test() { let bytes = IndexEntryBuilder::new() .is_first(true) .with_general_delta(false) .with_inline(false) .build(); - assert_eq!(is_inline(&bytes), false) + assert_eq!(is_inline(&bytes), false); + assert_eq!(uses_generaldelta(&bytes), false); } #[test] - fn is_inline_when_inline_flag_test() { + fn flags_when_inline_flag_test() { let bytes = IndexEntryBuilder::new() .is_first(true) .with_general_delta(false) .with_inline(true) .build(); - assert_eq!(is_inline(&bytes), true) + assert_eq!(is_inline(&bytes), true); + assert_eq!(uses_generaldelta(&bytes), false); } #[test] - fn is_inline_when_inline_and_generaldelta_flags_test() { + fn flags_when_inline_and_generaldelta_flags_test() { let bytes = IndexEntryBuilder::new() .is_first(true) .with_general_delta(true) .with_inline(true) .build(); - assert_eq!(is_inline(&bytes), true) + assert_eq!(is_inline(&bytes), true); + assert_eq!(uses_generaldelta(&bytes), true); } #[test] @@ -400,6 +489,16 @@ assert_eq!(entry.base_revision(), 1) } + + #[test] + fn version_test() { + let bytes = IndexEntryBuilder::new() + .is_first(true) + .with_version(1) + .build(); + + assert_eq!(get_version(&bytes), 1) + } } #[cfg(test)]
--- a/rust/hg-core/src/revlog/revlog.rs Tue Dec 07 17:50:19 2021 +0000 +++ b/rust/hg-core/src/revlog/revlog.rs Tue Dec 07 18:12:13 2021 +0000 @@ -3,7 +3,6 @@ use std::ops::Deref; use std::path::Path; -use byteorder::{BigEndian, ByteOrder}; use flate2::read::ZlibDecoder; use micro_timer::timed; use sha1::{Digest, Sha1}; @@ -74,13 +73,6 @@ match repo.store_vfs().mmap_open_opt(&index_path)? { None => Index::new(Box::new(vec![])), Some(index_mmap) => { - let version = get_version(&index_mmap)?; - if version != 1 { - // A proper new version should have had a repo/store - // requirement. - return Err(HgError::corrupted("corrupted revlog")); - } - let index = Index::new(Box::new(index_mmap))?; Ok(index) } @@ -387,19 +379,6 @@ } } -/// Format version of the revlog. -pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> { - if index_bytes.len() == 0 { - return Ok(1); - }; - if index_bytes.len() < 4 { - return Err(HgError::corrupted( - "corrupted revlog: can't read the index format header", - )); - }; - Ok(BigEndian::read_u16(&index_bytes[2..=3])) -} - /// Calculate the hash of a revision given its data and its parents. fn hash( data: &[u8], @@ -418,20 +397,3 @@ hasher.update(data); *hasher.finalize().as_ref() } - -#[cfg(test)] -mod tests { - use super::*; - - use super::super::index::IndexEntryBuilder; - - #[test] - fn version_test() { - let bytes = IndexEntryBuilder::new() - .is_first(true) - .with_version(1) - .build(); - - assert_eq!(get_version(&bytes).map_err(|_err| ()), Ok(1)) - } -}