Mercurial > hg
changeset 50163:11661326b410
rhg: in path_encode, split Dest into VecDest and MeasureDest
Two separate types make the write semantics easier to understand
because we can consider the two sinks separately.
Having two independent compiled functions for size measurement
and for actual encoding seems likely to improve performance, too.
(and maybe we should get rid of measurement altogether)
Getting rid of [Dest] also removes the ugly option rewrapping
code, which is good.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Thu, 16 Feb 2023 19:00:56 +0000 |
parents | 6baea276a985 |
children | 8e50aa0db347 |
files | rust/hg-core/src/revlog/path_encode.rs |
diffstat | 1 files changed, 33 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/path_encode.rs Thu Feb 16 18:46:44 2023 +0000 +++ b/rust/hg-core/src/revlog/path_encode.rs Thu Feb 16 19:00:56 2023 +0000 @@ -77,45 +77,44 @@ } } -struct Dest<'a> { - dest: Option<&'a mut [u8]>, +struct VecDest { + buf: Vec<u8>, +} + +struct MeasureDest { pub len: usize, } -impl<'a> Dest<'a> { - pub fn create(buf: &'a mut [u8]) -> Dest<'a> { - Dest { - dest: Some(buf), - len: 0, +impl VecDest { + pub fn create(capacity : usize) -> Self { + Self { + buf: Vec::with_capacity(capacity), } } - - pub fn create_measure() -> Dest<'a> { - Dest { dest: None, len: 0 } - } } -fn rewrap_option<'a, 'b: 'a>( - x: &'a mut Option<&'b mut [u8]>, -) -> Option<&'a mut [u8]> { - match x { - None => None, - Some(y) => Some(y), +impl Sink for VecDest { + fn write_byte(&mut self, c: u8) { + self.buf.push(c) + } + + fn write_bytes(&mut self, src: &[u8]) { + self.buf.extend_from_slice(src) } } -impl<'a> Sink for Dest<'a> { - fn write_byte(&mut self, c: u8) { - if let Some(slice) = rewrap_option(&mut self.dest) { - slice[self.len] = c - } - self.len += 1 +impl MeasureDest { + fn create() -> Self { + Self { len: 0 } + } +} + +impl Sink for MeasureDest { + fn write_byte(&mut self, _c: u8) { + self.len += 1; } fn write_bytes(&mut self, src: &[u8]) { - if let Some(slice) = rewrap_option(&mut self.dest) { - slice[self.len..self.len + src.len()].copy_from_slice(src) - } self.len += src.len(); } } @@ -545,13 +544,13 @@ src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s) }; - let mut dest : DestArr<MAXSTOREPATHLEN> = DestArr::create(); + let mut dest : VecDest = VecDest::create(MAXSTOREPATHLEN); dest.write_bytes(b"dh/"); if let Some(last_slash) = last_slash { for slice in src[..last_slash].split(|b| *b == b'/') { let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)]; - if dest.len + slice.len() > maxshortdirslen + 3 { + if dest.buf.len() + slice.len() > maxshortdirslen + 3 { break; } else { dest.write_bytes(slice); @@ -560,7 +559,7 @@ } } - let used = dest.len + 40 + { + let used = dest.buf.len() + 40 + { if let Some(l) = last_dot { src.len() - l } else { @@ -589,7 +588,7 @@ if let Some(l) = last_dot { dest.write_bytes(&src[l..]); } - dest.contents().to_vec() + dest.buf } fn hash_encode(src: &[u8]) -> Vec<u8> { @@ -609,7 +608,7 @@ pub fn path_encode(path: &[u8]) -> Vec<u8> { let newlen = if path.len() <= MAXSTOREPATHLEN { - let mut measure = Dest::create_measure(); + let mut measure = MeasureDest::create(); basic_encode(&mut measure, path); measure.len } else { @@ -619,11 +618,10 @@ if newlen == path.len() { path.to_vec() } else { - let mut res = vec![0; newlen]; - let mut dest = Dest::create(&mut res); + let mut dest = VecDest::create(newlen); basic_encode(&mut dest, path); - assert!(dest.len == newlen); - res + assert!(dest.buf.len() == newlen); + dest.buf } } else { hash_encode(path)