comparison rust/hg-core/src/revlog/revlog.rs @ 48542:35c47015b9b7

rhg: Expose FilelogEntry that wraps RevlogEntry This can be later extended to access metadata such as `uncompressed_len` without necessarily resolving deltas. Differential Revision: https://phab.mercurial-scm.org/D11961
author Simon Sapin <simon.sapin@octobus.net>
date Tue, 21 Dec 2021 18:50:44 +0100
parents f2f57724d4eb
children 0a4ac916673e
comparison
equal deleted inserted replaced
48541:f2f57724d4eb 48542:35c47015b9b7
37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(), 37 NodeMapError::RevisionNotInIndex(_) => RevlogError::corrupted(),
38 } 38 }
39 } 39 }
40 } 40 }
41 41
42 fn corrupted() -> HgError {
43 HgError::corrupted("corrupted revlog")
44 }
45
42 impl RevlogError { 46 impl RevlogError {
43 fn corrupted() -> Self { 47 fn corrupted() -> Self {
44 RevlogError::Other(HgError::corrupted("corrupted revlog")) 48 RevlogError::Other(corrupted())
45 } 49 }
46 } 50 }
47 51
48 /// Read only implementation of revlog. 52 /// Read only implementation of revlog.
49 pub struct Revlog { 53 pub struct Revlog {
189 rev: Revision, 193 rev: Revision,
190 ) -> Result<Cow<[u8]>, RevlogError> { 194 ) -> Result<Cow<[u8]>, RevlogError> {
191 if rev == NULL_REVISION { 195 if rev == NULL_REVISION {
192 return Ok(Cow::Borrowed(&[])); 196 return Ok(Cow::Borrowed(&[]));
193 }; 197 };
194 self.get_entry(rev)?.data() 198 Ok(self.get_entry(rev)?.data()?)
195 } 199 }
196 200
197 /// Check the hash of some given data against the recorded hash. 201 /// Check the hash of some given data against the recorded hash.
198 pub fn check_hash( 202 pub fn check_hash(
199 &self, 203 &self,
220 /// and its deltas. 224 /// and its deltas.
221 #[timed] 225 #[timed]
222 fn build_data_from_deltas( 226 fn build_data_from_deltas(
223 snapshot: RevlogEntry, 227 snapshot: RevlogEntry,
224 deltas: &[RevlogEntry], 228 deltas: &[RevlogEntry],
225 ) -> Result<Vec<u8>, RevlogError> { 229 ) -> Result<Vec<u8>, HgError> {
226 let snapshot = snapshot.data_chunk()?; 230 let snapshot = snapshot.data_chunk()?;
227 let deltas = deltas 231 let deltas = deltas
228 .iter() 232 .iter()
229 .rev() 233 .rev()
230 .map(RevlogEntry::data_chunk) 234 .map(RevlogEntry::data_chunk)
231 .collect::<Result<Vec<Cow<'_, [u8]>>, RevlogError>>()?; 235 .collect::<Result<Vec<_>, _>>()?;
232 let patches: Vec<_> = 236 let patches: Vec<_> =
233 deltas.iter().map(|d| patch::PatchList::new(d)).collect(); 237 deltas.iter().map(|d| patch::PatchList::new(d)).collect();
234 let patch = patch::fold_patch_lists(&patches); 238 let patch = patch::fold_patch_lists(&patches);
235 Ok(patch.apply(&snapshot)) 239 Ok(patch.apply(&snapshot))
236 } 240 }
244 ), 248 ),
245 } 249 }
246 } 250 }
247 251
248 /// Get an entry of the revlog. 252 /// Get an entry of the revlog.
249 fn get_entry(&self, rev: Revision) -> Result<RevlogEntry, RevlogError> { 253 pub fn get_entry(
254 &self,
255 rev: Revision,
256 ) -> Result<RevlogEntry, RevlogError> {
250 let index_entry = self 257 let index_entry = self
251 .index 258 .index
252 .get_entry(rev) 259 .get_entry(rev)
253 .ok_or(RevlogError::InvalidRevision)?; 260 .ok_or(RevlogError::InvalidRevision)?;
254 let start = index_entry.offset(); 261 let start = index_entry.offset();
279 /// when resolving internal references within revlog, any errors 286 /// when resolving internal references within revlog, any errors
280 /// should be reported as corruption, instead of e.g. "invalid revision" 287 /// should be reported as corruption, instead of e.g. "invalid revision"
281 fn get_entry_internal( 288 fn get_entry_internal(
282 &self, 289 &self,
283 rev: Revision, 290 rev: Revision,
284 ) -> Result<RevlogEntry, RevlogError> { 291 ) -> Result<RevlogEntry, HgError> {
285 return self.get_entry(rev).map_err(|_| RevlogError::corrupted()); 292 return self.get_entry(rev).map_err(|_| corrupted());
286 } 293 }
287 } 294 }
288 295
289 /// The revlog entry's bytes and the necessary informations to extract 296 /// The revlog entry's bytes and the necessary informations to extract
290 /// the entry's data. 297 /// the entry's data.
302 pub fn revision(&self) -> Revision { 309 pub fn revision(&self) -> Revision {
303 self.rev 310 self.rev
304 } 311 }
305 312
306 /// The data for this entry, after resolving deltas if any. 313 /// The data for this entry, after resolving deltas if any.
307 pub fn data(&self) -> Result<Cow<'a, [u8]>, RevlogError> { 314 pub fn data(&self) -> Result<Cow<'a, [u8]>, HgError> {
308 let mut entry = self.clone(); 315 let mut entry = self.clone();
309 let mut delta_chain = vec![]; 316 let mut delta_chain = vec![];
310 317
311 // The meaning of `base_rev_or_base_of_delta_chain` depends on 318 // The meaning of `base_rev_or_base_of_delta_chain` depends on
312 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in 319 // generaldelta. See the doc on `ENTRY_DELTA_BASE` in
326 // TODO do not look twice in the index 333 // TODO do not look twice in the index
327 let index_entry = self 334 let index_entry = self
328 .revlog 335 .revlog
329 .index 336 .index
330 .get_entry(self.rev) 337 .get_entry(self.rev)
331 .ok_or(RevlogError::InvalidRevision)?; 338 .ok_or_else(corrupted)?;
332 339
333 let data = if delta_chain.is_empty() { 340 let data = if delta_chain.is_empty() {
334 entry.data_chunk()? 341 entry.data_chunk()?
335 } else { 342 } else {
336 Revlog::build_data_from_deltas(entry, &delta_chain)?.into() 343 Revlog::build_data_from_deltas(entry, &delta_chain)?.into()
342 index_entry.hash().as_bytes(), 349 index_entry.hash().as_bytes(),
343 &data, 350 &data,
344 ) { 351 ) {
345 Ok(data) 352 Ok(data)
346 } else { 353 } else {
347 Err(RevlogError::corrupted()) 354 Err(corrupted())
348 } 355 }
349 } 356 }
350 357
351 /// Extract the data contained in the entry. 358 /// Extract the data contained in the entry.
352 /// This may be a delta. (See `is_delta`.) 359 /// This may be a delta. (See `is_delta`.)
353 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, RevlogError> { 360 fn data_chunk(&self) -> Result<Cow<'a, [u8]>, HgError> {
354 if self.bytes.is_empty() { 361 if self.bytes.is_empty() {
355 return Ok(Cow::Borrowed(&[])); 362 return Ok(Cow::Borrowed(&[]));
356 } 363 }
357 match self.bytes[0] { 364 match self.bytes[0] {
358 // Revision data is the entirety of the entry, including this 365 // Revision data is the entirety of the entry, including this
363 // zlib (RFC 1950) data. 370 // zlib (RFC 1950) data.
364 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)), 371 b'x' => Ok(Cow::Owned(self.uncompressed_zlib_data()?)),
365 // zstd data. 372 // zstd data.
366 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)), 373 b'\x28' => Ok(Cow::Owned(self.uncompressed_zstd_data()?)),
367 // A proper new format should have had a repo/store requirement. 374 // A proper new format should have had a repo/store requirement.
368 _format_type => Err(RevlogError::corrupted()), 375 _format_type => Err(corrupted()),
369 } 376 }
370 } 377 }
371 378
372 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, RevlogError> { 379 fn uncompressed_zlib_data(&self) -> Result<Vec<u8>, HgError> {
373 let mut decoder = ZlibDecoder::new(self.bytes); 380 let mut decoder = ZlibDecoder::new(self.bytes);
374 if self.is_delta() { 381 if self.is_delta() {
375 let mut buf = Vec::with_capacity(self.compressed_len); 382 let mut buf = Vec::with_capacity(self.compressed_len);
376 decoder 383 decoder.read_to_end(&mut buf).map_err(|_| corrupted())?;
377 .read_to_end(&mut buf)
378 .map_err(|_| RevlogError::corrupted())?;
379 Ok(buf) 384 Ok(buf)
380 } else { 385 } else {
381 let mut buf = vec![0; self.uncompressed_len]; 386 let mut buf = vec![0; self.uncompressed_len];
382 decoder 387 decoder.read_exact(&mut buf).map_err(|_| corrupted())?;
383 .read_exact(&mut buf)
384 .map_err(|_| RevlogError::corrupted())?;
385 Ok(buf) 388 Ok(buf)
386 } 389 }
387 } 390 }
388 391
389 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, RevlogError> { 392 fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> {
390 if self.is_delta() { 393 if self.is_delta() {
391 let mut buf = Vec::with_capacity(self.compressed_len); 394 let mut buf = Vec::with_capacity(self.compressed_len);
392 zstd::stream::copy_decode(self.bytes, &mut buf) 395 zstd::stream::copy_decode(self.bytes, &mut buf)
393 .map_err(|_| RevlogError::corrupted())?; 396 .map_err(|_| corrupted())?;
394 Ok(buf) 397 Ok(buf)
395 } else { 398 } else {
396 let mut buf = vec![0; self.uncompressed_len]; 399 let mut buf = vec![0; self.uncompressed_len];
397 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf) 400 let len = zstd::block::decompress_to_buffer(self.bytes, &mut buf)
398 .map_err(|_| RevlogError::corrupted())?; 401 .map_err(|_| corrupted())?;
399 if len != self.uncompressed_len { 402 if len != self.uncompressed_len {
400 Err(RevlogError::corrupted()) 403 Err(corrupted())
401 } else { 404 } else {
402 Ok(buf) 405 Ok(buf)
403 } 406 }
404 } 407 }
405 } 408 }