Mercurial > hg
changeset 50508:39ed7b2953bb
rust: mostly avoid streaming zstd decompression
Streaming ZStd decompression seems slightly slower, and
the API we use makes it very inconvenient to re-use the
decompression context.
Instead of using that, use the buffer-backed version,
because we can give a reasonable-ish size estimate.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Thu, 18 May 2023 17:53:17 +0100 |
parents | d1cab48354bc |
children | 1b73868d17cf |
files | rust/hg-core/src/revlog/mod.rs |
diffstat | 1 files changed, 17 insertions(+), 4 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/mod.rs Thu May 18 17:25:18 2023 +0100 +++ b/rust/hg-core/src/revlog/mod.rs Thu May 18 17:53:17 2023 +0100 @@ -596,13 +596,26 @@ } fn uncompressed_zstd_data(&self) -> Result<Vec<u8>, HgError> { + let cap = self.uncompressed_len.max(0) as usize; if self.is_delta() { - let mut buf = Vec::with_capacity(self.compressed_len as usize); - zstd::stream::copy_decode(self.bytes, &mut buf) - .map_err(|e| corrupted(e.to_string()))?; + // [cap] is usually an over-estimate of the space needed because + // it's the length of delta-decoded data, but we're interested + // in the size of the delta. + // This means we have to [shrink_to_fit] to avoid holding on + // to a large chunk of memory, but it also means we must have a + // fallback branch, for the case when the delta is longer than + // the original data (surprisingly, this does happen in practice) + let mut buf = Vec::with_capacity(cap); + match zstd_decompress_to_buffer(self.bytes, &mut buf) { + Ok(_) => buf.shrink_to_fit(), + Err(_) => { + buf.clear(); + zstd::stream::copy_decode(self.bytes, &mut buf) + .map_err(|e| corrupted(e.to_string()))?; + } + }; Ok(buf) } else { - let cap = self.uncompressed_len.max(0) as usize; let mut buf = Vec::with_capacity(cap); let len = zstd_decompress_to_buffer(self.bytes, &mut buf) .map_err(|e| corrupted(e.to_string()))?;