# HG changeset patch # User Raphaël Gomès # Date 1728549515 -7200 # Node ID 44823c5011fe7abf3f29575563519c6aad77baed # Parent f2eab4967bfcb3016a635f3b980006e5b5e8408a rust-index: fix the computation of data start This was falling into place instead of being correct, we clean up the logic by differenciating the on-disk offset and the actual start of the data more cleanly. diff -r f2eab4967bfc -r 44823c5011fe rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs Thu Oct 10 10:38:10 2024 +0200 +++ b/rust/hg-core/src/revlog/index.rs Thu Oct 10 10:38:35 2024 +0200 @@ -74,7 +74,7 @@ index_bytes[0..4].try_into().expect("impossible"); bytes }, - })) + }) } } @@ -925,8 +925,8 @@ let mut gaps = Vec::new(); let mut previous_end = None; - for (i, (_rev, entry)) in entries.iter().enumerate() { - let start = entry.c_start() as usize; + for (i, (rev, entry)) in entries.iter().enumerate() { + let start = self.start(*rev, entry); let length = entry.compressed_len(); // Skip empty revisions to form larger holes @@ -1004,15 +1004,14 @@ if revs.is_empty() { return 0; } - let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap(); - let end = last_entry.c_start() + last_entry.compressed_len() as u64; + let last_rev = revs[revs.len() - 1]; + let last_entry = &self.get_entry(last_rev).unwrap(); + let end = self.start(last_rev, last_entry) + + last_entry.compressed_len() as usize; let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap(); - let start = if first_rev.0 == 0 { - 0 - } else { - self.get_entry(*first_rev).unwrap().c_start() - }; - (end - start) as usize + let first_entry = self.get_entry(*first_rev).unwrap(); + let start = self.start(*first_rev, &first_entry); + end - start } /// Returns `&revs[startidx..endidx]` without empty trailing revs @@ -1379,6 +1378,25 @@ }) .collect()) } + + /// Return the offset into the data corresponding to `rev` (in the index + /// file if inline, in the data file otherwise). `entry` must be the entry + /// for `rev`: the API is done this way to reduce the number of lookups + /// since we sometimes already have the entry, and because very few places + /// actually use this function. + #[inline(always)] + pub fn start(&self, rev: Revision, entry: &IndexEntry<'_>) -> usize { + #[cfg(debug_assertions)] + { + assert_eq!(&self.get_entry(rev).unwrap(), entry); + } + let offset = entry.offset(); + if self.is_inline() { + offset + ((rev.0 as usize + 1) * INDEX_ENTRY_SIZE) + } else { + offset + } + } } /// The kind of functionality needed by find_gca_candidates @@ -1692,7 +1710,7 @@ } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub struct IndexEntry<'a> { bytes: &'a [u8], } @@ -1708,11 +1726,6 @@ BigEndian::read_u64(&self.bytes[0..8]) } - /// Same result (except potentially for rev 0) as C `index_get_start()` - fn c_start(&self) -> u64 { - self.raw_offset() >> 16 - } - pub fn flags(&self) -> u16 { BigEndian::read_u16(&self.bytes[6..=7]) }