Mercurial > hg
changeset 48458:96ea4db4741b
rhg: fix a crash on non-generaldelta revlogs
Differential Revision: https://phab.mercurial-scm.org/D11882
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Tue, 07 Dec 2021 18:57:43 +0000 |
parents | 1fb3615dfce2 |
children | bc6547f61310 |
files | rust/hg-core/src/revlog/index.rs rust/hg-core/src/revlog/revlog.rs tests/test-rhg-no-generaldelta.t |
diffstat | 3 files changed, 80 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/index.rs Tue Dec 07 18:12:13 2021 +0000 +++ b/rust/hg-core/src/revlog/index.rs Tue Dec 07 18:57:43 2021 +0000 @@ -84,6 +84,7 @@ /// Offsets of starts of index blocks. /// Only needed when the index is interleaved with data. offsets: Option<Vec<usize>>, + uses_generaldelta: bool, } impl Index { @@ -100,6 +101,11 @@ return Err(HgError::corrupted("unsupported revlog version")); } + // This is only correct because we know version is REVLOGV1. + // In v2 we always use generaldelta, while in v0 we never use + // generaldelta. Similar for [is_inline] (it's only used in v1). + let uses_generaldelta = header.format_flags().uses_generaldelta(); + if header.format_flags().is_inline() { let mut offset: usize = 0; let mut offsets = Vec::new(); @@ -119,6 +125,7 @@ Ok(Self { bytes, offsets: Some(offsets), + uses_generaldelta, }) } else { Err(HgError::corrupted("unexpected inline revlog length") @@ -128,10 +135,15 @@ Ok(Self { bytes, offsets: None, + uses_generaldelta, }) } } + pub fn uses_generaldelta(&self) -> bool { + self.uses_generaldelta + } + /// Value of the inline flag. pub fn is_inline(&self) -> bool { self.offsets.is_some() @@ -259,7 +271,7 @@ } /// Return the revision upon which the data has been derived. - pub fn base_revision(&self) -> Revision { + pub fn base_revision_or_base_of_delta_chain(&self) -> Revision { // TODO Maybe return an Option when base_revision == rev? // Requires to add rev to IndexEntry @@ -297,7 +309,7 @@ offset: usize, compressed_len: usize, uncompressed_len: usize, - base_revision: Revision, + base_revision_or_base_of_delta_chain: Revision, } #[cfg(test)] @@ -311,7 +323,7 @@ offset: 0, compressed_len: 0, uncompressed_len: 0, - base_revision: 0, + base_revision_or_base_of_delta_chain: 0, } } @@ -350,8 +362,11 @@ self } - pub fn with_base_revision(&mut self, value: Revision) -> &mut Self { - self.base_revision = value; + pub fn with_base_revision_or_base_of_delta_chain( + &mut self, + value: Revision, + ) -> &mut Self { + self.base_revision_or_base_of_delta_chain = value; self } @@ -374,7 +389,9 @@ bytes.extend(&[0u8; 2]); // Revision flags. bytes.extend(&(self.compressed_len as u32).to_be_bytes()); bytes.extend(&(self.uncompressed_len as u32).to_be_bytes()); - bytes.extend(&self.base_revision.to_be_bytes()); + bytes.extend( + &self.base_revision_or_base_of_delta_chain.to_be_bytes(), + ); bytes } } @@ -480,14 +497,16 @@ } #[test] - fn test_base_revision() { - let bytes = IndexEntryBuilder::new().with_base_revision(1).build(); + fn test_base_revision_or_base_of_delta_chain() { + let bytes = IndexEntryBuilder::new() + .with_base_revision_or_base_of_delta_chain(1) + .build(); let entry = IndexEntry { bytes: &bytes, offset_override: None, }; - assert_eq!(entry.base_revision(), 1) + assert_eq!(entry.base_revision_or_base_of_delta_chain(), 1) } #[test]
--- a/rust/hg-core/src/revlog/revlog.rs Tue Dec 07 18:12:13 2021 +0000 +++ b/rust/hg-core/src/revlog/revlog.rs Tue Dec 07 18:57:43 2021 +0000 @@ -191,11 +191,20 @@ // Todo return -> Cow let mut entry = self.get_entry(rev)?; let mut delta_chain = vec![]; - while let Some(base_rev) = entry.base_rev { + + // The meaning of `base_rev_or_base_of_delta_chain` depends on + // generaldelta. See the doc on `ENTRY_DELTA_BASE` in + // `mercurial/revlogutils/constants.py` and the code in + // [_chaininfo] and in [index_deltachain]. + let uses_generaldelta = self.index.uses_generaldelta(); + while let Some(base_rev) = entry.base_rev_or_base_of_delta_chain { + let base_rev = if uses_generaldelta { + base_rev + } else { + entry.rev - 1 + }; delta_chain.push(entry); - entry = self - .get_entry(base_rev) - .map_err(|_| RevlogError::corrupted())?; + entry = self.get_entry_internal(base_rev)?; } // TODO do not look twice in the index @@ -291,14 +300,26 @@ bytes: data, compressed_len: index_entry.compressed_len(), uncompressed_len: index_entry.uncompressed_len(), - base_rev: if index_entry.base_revision() == rev { + base_rev_or_base_of_delta_chain: if index_entry + .base_revision_or_base_of_delta_chain() + == rev + { None } else { - Some(index_entry.base_revision()) + Some(index_entry.base_revision_or_base_of_delta_chain()) }, }; Ok(entry) } + + /// when resolving internal references within revlog, any errors + /// should be reported as corruption, instead of e.g. "invalid revision" + fn get_entry_internal( + &self, + rev: Revision, + ) -> Result<RevlogEntry, RevlogError> { + return self.get_entry(rev).map_err(|_| RevlogError::corrupted()); + } } /// The revlog entry's bytes and the necessary informations to extract @@ -309,7 +330,7 @@ bytes: &'a [u8], compressed_len: usize, uncompressed_len: usize, - base_rev: Option<Revision>, + base_rev_or_base_of_delta_chain: Option<Revision>, } impl<'a> RevlogEntry<'a> { @@ -375,7 +396,7 @@ /// Tell if the entry is a snapshot or a delta /// (influences on decompression). fn is_delta(&self) -> bool { - self.base_rev.is_some() + self.base_rev_or_base_of_delta_chain.is_some() } }
--- a/tests/test-rhg-no-generaldelta.t Tue Dec 07 18:12:13 2021 +0000 +++ b/tests/test-rhg-no-generaldelta.t Tue Dec 07 18:57:43 2021 +0000 @@ -1,4 +1,3 @@ - $ NO_FALLBACK="env RHG_ON_UNSUPPORTED=abort" $ cat << EOF >> $HGRCPATH @@ -21,9 +20,27 @@ 1 0 prev 2 1 prev -rhg breaks on non-generaldelta revlogs: +rhg works on non-generaldelta revlogs: - $ $NO_FALLBACK hg cat f -r . | f --sha256 --size - abort: corrupted revlog (rhg !) - size=0, sha256=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 (rhg !) - size=58, sha256=0cf0386dd4813cc3b957ea790146627dfc0ec42ad3fcf47221b9842e4d5764c1 (no-rhg !) + $ $NO_FALLBACK hg cat f -r . + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + footer