rhg: centralize index header parsing
Centralize index header parsing, parse the generaldelta flag,
and leave breadcrumbs to relate the code to python.
Differential Revision: https://phab.mercurial-scm.org/D11881
--- a/rust/hg-core/src/revlog/index.rs Tue Dec 07 17:50:19 2021 +0000
+++ b/rust/hg-core/src/revlog/index.rs Tue Dec 07 18:12:13 2021 +0000
@@ -9,6 +9,75 @@
pub const INDEX_ENTRY_SIZE: usize = 64;
+pub struct IndexHeader {
+ header_bytes: [u8; 4],
+}
+
+#[derive(Copy, Clone)]
+pub struct IndexHeaderFlags {
+ flags: u16,
+}
+
+/// Corresponds to the high bits of `_format_flags` in python
+impl IndexHeaderFlags {
+ /// Corresponds to FLAG_INLINE_DATA in python
+ pub fn is_inline(self) -> bool {
+ return self.flags & 1 != 0;
+ }
+ /// Corresponds to FLAG_GENERALDELTA in python
+ pub fn uses_generaldelta(self) -> bool {
+ return self.flags & 2 != 0;
+ }
+}
+
+/// Corresponds to the INDEX_HEADER structure,
+/// which is parsed as a `header` variable in `_loadindex` in `revlog.py`
+impl IndexHeader {
+ fn format_flags(&self) -> IndexHeaderFlags {
+ // No "unknown flags" check here, unlike in python. Maybe there should
+ // be.
+ return IndexHeaderFlags {
+ flags: BigEndian::read_u16(&self.header_bytes[0..2]),
+ };
+ }
+
+ /// The only revlog version currently supported by rhg.
+ const REVLOGV1: u16 = 1;
+
+ /// Corresponds to `_format_version` in Python.
+ fn format_version(&self) -> u16 {
+ return BigEndian::read_u16(&self.header_bytes[2..4]);
+ }
+
+ const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader {
+ // We treat an empty file as a valid index with no entries.
+ // Here we make an arbitrary choice of what we assume the format of the
+ // index to be (V1, using generaldelta).
+ // This doesn't matter too much, since we're only doing read-only
+ // access. but the value corresponds to the `new_header` variable in
+ // `revlog.py`, `_loadindex`
+ header_bytes: [0, 3, 0, 1],
+ };
+
+ fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> {
+ if index_bytes.len() == 0 {
+ return Ok(IndexHeader::EMPTY_INDEX_HEADER);
+ }
+ if index_bytes.len() < 4 {
+ return Err(HgError::corrupted(
+ "corrupted revlog: can't read the index format header",
+ ));
+ }
+ return Ok(IndexHeader {
+ header_bytes: {
+ let bytes: [u8; 4] =
+ index_bytes[0..4].try_into().expect("impossible");
+ bytes
+ },
+ });
+ }
+}
+
/// A Revlog index
pub struct Index {
bytes: Box<dyn Deref<Target = [u8]> + Send>,
@@ -23,7 +92,15 @@
pub fn new(
bytes: Box<dyn Deref<Target = [u8]> + Send>,
) -> Result<Self, HgError> {
- if is_inline(&bytes) {
+ let header = IndexHeader::parse(bytes.as_ref())?;
+
+ if header.format_version() != IndexHeader::REVLOGV1 {
+ // A proper new version should have had a repo/store
+ // requirement.
+ return Err(HgError::corrupted("unsupported revlog version"));
+ }
+
+ if header.format_flags().is_inline() {
let mut offset: usize = 0;
let mut offsets = Vec::new();
@@ -206,17 +283,6 @@
}
}
-/// Value of the inline flag.
-pub fn is_inline(index_bytes: &[u8]) -> bool {
- if index_bytes.len() < 4 {
- return true;
- }
- match &index_bytes[0..=1] {
- [0, 0] | [0, 2] => false,
- _ => true,
- }
-}
-
#[cfg(test)]
mod tests {
use super::*;
@@ -313,37 +379,60 @@
}
}
+ pub fn is_inline(index_bytes: &[u8]) -> bool {
+ IndexHeader::parse(index_bytes)
+ .expect("too short")
+ .format_flags()
+ .is_inline()
+ }
+
+ pub fn uses_generaldelta(index_bytes: &[u8]) -> bool {
+ IndexHeader::parse(index_bytes)
+ .expect("too short")
+ .format_flags()
+ .uses_generaldelta()
+ }
+
+ pub fn get_version(index_bytes: &[u8]) -> u16 {
+ IndexHeader::parse(index_bytes)
+ .expect("too short")
+ .format_version()
+ }
+
#[test]
- fn is_not_inline_when_no_inline_flag_test() {
+ fn flags_when_no_inline_flag_test() {
let bytes = IndexEntryBuilder::new()
.is_first(true)
.with_general_delta(false)
.with_inline(false)
.build();
- assert_eq!(is_inline(&bytes), false)
+ assert_eq!(is_inline(&bytes), false);
+ assert_eq!(uses_generaldelta(&bytes), false);
}
#[test]
- fn is_inline_when_inline_flag_test() {
+ fn flags_when_inline_flag_test() {
let bytes = IndexEntryBuilder::new()
.is_first(true)
.with_general_delta(false)
.with_inline(true)
.build();
- assert_eq!(is_inline(&bytes), true)
+ assert_eq!(is_inline(&bytes), true);
+ assert_eq!(uses_generaldelta(&bytes), false);
}
#[test]
- fn is_inline_when_inline_and_generaldelta_flags_test() {
+ fn flags_when_inline_and_generaldelta_flags_test() {
let bytes = IndexEntryBuilder::new()
.is_first(true)
.with_general_delta(true)
.with_inline(true)
.build();
- assert_eq!(is_inline(&bytes), true)
+ assert_eq!(is_inline(&bytes), true);
+ assert_eq!(uses_generaldelta(&bytes), true);
}
#[test]
@@ -400,6 +489,16 @@
assert_eq!(entry.base_revision(), 1)
}
+
+ #[test]
+ fn version_test() {
+ let bytes = IndexEntryBuilder::new()
+ .is_first(true)
+ .with_version(1)
+ .build();
+
+ assert_eq!(get_version(&bytes), 1)
+ }
}
#[cfg(test)]
--- a/rust/hg-core/src/revlog/revlog.rs Tue Dec 07 17:50:19 2021 +0000
+++ b/rust/hg-core/src/revlog/revlog.rs Tue Dec 07 18:12:13 2021 +0000
@@ -3,7 +3,6 @@
use std::ops::Deref;
use std::path::Path;
-use byteorder::{BigEndian, ByteOrder};
use flate2::read::ZlibDecoder;
use micro_timer::timed;
use sha1::{Digest, Sha1};
@@ -74,13 +73,6 @@
match repo.store_vfs().mmap_open_opt(&index_path)? {
None => Index::new(Box::new(vec![])),
Some(index_mmap) => {
- let version = get_version(&index_mmap)?;
- if version != 1 {
- // A proper new version should have had a repo/store
- // requirement.
- return Err(HgError::corrupted("corrupted revlog"));
- }
-
let index = Index::new(Box::new(index_mmap))?;
Ok(index)
}
@@ -387,19 +379,6 @@
}
}
-/// Format version of the revlog.
-pub fn get_version(index_bytes: &[u8]) -> Result<u16, HgError> {
- if index_bytes.len() == 0 {
- return Ok(1);
- };
- if index_bytes.len() < 4 {
- return Err(HgError::corrupted(
- "corrupted revlog: can't read the index format header",
- ));
- };
- Ok(BigEndian::read_u16(&index_bytes[2..=3]))
-}
-
/// Calculate the hash of a revision given its data and its parents.
fn hash(
data: &[u8],
@@ -418,20 +397,3 @@
hasher.update(data);
*hasher.finalize().as_ref()
}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- use super::super::index::IndexEntryBuilder;
-
- #[test]
- fn version_test() {
- let bytes = IndexEntryBuilder::new()
- .is_first(true)
- .with_version(1)
- .build();
-
- assert_eq!(get_version(&bytes).map_err(|_err| ()), Ok(1))
- }
-}