Mercurial > hg
changeset 50159:96d31efd21f7
rhg: reduce verbosity in path_encode by using a trait for writing
Hopefully this makes the code easier to read and understand and
shorter overall.
It also lets us later tweak the type we use as a [Sink],
without having to change the encoding functions, including using
two different types for size measurement and for the actual
serialization.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Thu, 16 Feb 2023 18:29:52 +0000 |
parents | 0d2ec486d95c |
children | 5ce53ff6133a |
files | rust/hg-core/src/revlog/path_encode.rs |
diffstat | 1 files changed, 133 insertions(+), 154 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/path_encode.rs Thu Feb 16 16:20:17 2023 +0000 +++ b/rust/hg-core/src/revlog/path_encode.rs Thu Feb 16 18:29:52 2023 +0000 @@ -36,22 +36,31 @@ DDEFAULT, } +trait Sink { + fn write_byte(&mut self, c: u8); + fn write_bytes(&mut self, c: &[u8]); +} + fn inset(bitset: &[u32; 8], c: u8) -> bool { bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0 } -fn charcopy(dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) { - if let Some(slice) = dest { - slice[*destlen] = c - } - *destlen += 1 +struct Dest<'a> { + dest: Option<&'a mut [u8]>, + pub len: usize, } -fn memcopy(dest: Option<&mut [u8]>, destlen: &mut usize, src: &[u8]) { - if let Some(slice) = dest { - slice[*destlen..*destlen + src.len()].copy_from_slice(src) +impl<'a> Dest<'a> { + pub fn create(buf: &'a mut [u8]) -> Dest<'a> { + Dest { + dest: Some(buf), + len: 0, + } } - *destlen += src.len(); + + pub fn create_measure() -> Dest<'a> { + Dest { dest: None, len: 0 } + } } fn rewrap_option<'a, 'b: 'a>( @@ -63,38 +72,49 @@ } } -fn hexencode(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) { +impl<'a> Sink for Dest<'a> { + fn write_byte(&mut self, c: u8) { + if let Some(slice) = rewrap_option(&mut self.dest) { + slice[self.len] = c + } + self.len += 1 + } + + fn write_bytes(&mut self, src: &[u8]) { + if let Some(slice) = rewrap_option(&mut self.dest) { + slice[self.len..self.len + src.len()].copy_from_slice(src) + } + self.len += src.len(); + } +} + +fn hexencode(dest: &mut impl Sink, c: u8) { let hexdigit = b"0123456789abcdef"; - charcopy( - rewrap_option(&mut dest), - destlen, - hexdigit[(c as usize) >> 4], - ); - charcopy(dest, destlen, hexdigit[(c as usize) & 15]); + dest.write_byte(hexdigit[(c as usize) >> 4]); + dest.write_byte(hexdigit[(c as usize) & 15]); } /* 3-byte escape: tilde followed by two hex digits */ -fn escape3(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) { - charcopy(rewrap_option(&mut dest), destlen, b'~'); - hexencode(dest, destlen, c); +fn escape3(dest: &mut impl Sink, c: u8) { + dest.write_byte(b'~'); + hexencode(dest, c); } -fn encode_dir(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize { +fn encode_dir(dest: &mut impl Sink, src: &[u8]) { let mut state = dir_state::DDEFAULT; let mut i = 0; - let mut destlen = 0; while i < src.len() { match state { dir_state::DDOT => match src[i] { b'd' | b'i' => { state = dir_state::DHGDI; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'h' => { state = dir_state::DH; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } _ => { @@ -104,7 +124,7 @@ dir_state::DH => { if src[i] == b'g' { state = dir_state::DHGDI; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = dir_state::DDEFAULT; @@ -112,8 +132,8 @@ } dir_state::DHGDI => { if src[i] == b'/' { - memcopy(rewrap_option(&mut dest), &mut destlen, b".hg"); - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_bytes(b".hg"); + dest.write_byte(src[i]); i += 1; } state = dir_state::DDEFAULT; @@ -122,66 +142,64 @@ if src[i] == b'.' { state = dir_state::DDOT } - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } } } - destlen } fn _encode( twobytes: &[u32; 8], onebyte: &[u32; 8], - mut dest: Option<&mut [u8]>, + dest: &mut impl Sink, src: &[u8], encodedir: bool, -) -> usize { +) { let mut state = path_state::START; let mut i = 0; - let mut destlen = 0; let len = src.len(); while i < len { match state { path_state::START => match src[i] { b'/' => { - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'.' => { state = path_state::LDOT; - escape3(rewrap_option(&mut dest), &mut destlen, src[i]); + escape3(dest, src[i]); i += 1; } b' ' => { state = path_state::DEFAULT; - escape3(rewrap_option(&mut dest), &mut destlen, src[i]); + escape3(dest, src[i]); i += 1; } b'a' => { state = path_state::A; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'c' => { state = path_state::C; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'l' => { state = path_state::L; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'n' => { state = path_state::N; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'p' => { state = path_state::P; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } _ => { @@ -191,7 +209,7 @@ path_state::A => { if src[i] == b'u' { state = path_state::AU; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = path_state::DEFAULT; @@ -208,18 +226,14 @@ path_state::THIRD => { state = path_state::DEFAULT; match src[i] { - b'.' | b'/' | b'\0' => escape3( - rewrap_option(&mut dest), - &mut destlen, - src[i - 1], - ), + b'.' | b'/' | b'\0' => escape3(dest, src[i - 1]), _ => i -= 1, } } path_state::C => { if src[i] == b'o' { state = path_state::CO; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = path_state::DEFAULT; @@ -242,41 +256,25 @@ i += 1; } else { state = path_state::DEFAULT; - charcopy( - rewrap_option(&mut dest), - &mut destlen, - src[i - 1], - ); + dest.write_byte(src[i - 1]); } } path_state::COMLPTn => { state = path_state::DEFAULT; match src[i] { b'.' | b'/' | b'\0' => { - escape3( - rewrap_option(&mut dest), - &mut destlen, - src[i - 2], - ); - charcopy( - rewrap_option(&mut dest), - &mut destlen, - src[i - 1], - ); + escape3(dest, src[i - 2]); + dest.write_byte(src[i - 1]); } _ => { - memcopy( - rewrap_option(&mut dest), - &mut destlen, - &src[i - 2..i], - ); + dest.write_bytes(&src[i - 2..i]); } } } path_state::L => { if src[i] == b'p' { state = path_state::LP; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = path_state::DEFAULT; @@ -293,7 +291,7 @@ path_state::N => { if src[i] == b'u' { state = path_state::NU; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = path_state::DEFAULT; @@ -310,7 +308,7 @@ path_state::P => { if src[i] == b'r' { state = path_state::PR; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = path_state::DEFAULT; @@ -327,12 +325,12 @@ path_state::LDOT => match src[i] { b'd' | b'i' => { state = path_state::HGDI; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } b'h' => { state = path_state::H; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } _ => { @@ -342,30 +340,30 @@ path_state::DOT => match src[i] { b'/' | b'\0' => { state = path_state::START; - memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e"); - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_bytes(b"~2e"); + dest.write_byte(src[i]); i += 1; } b'd' | b'i' => { state = path_state::HGDI; - charcopy(rewrap_option(&mut dest), &mut destlen, b'.'); - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(b'.'); + dest.write_byte(src[i]); i += 1; } b'h' => { state = path_state::H; - memcopy(rewrap_option(&mut dest), &mut destlen, b".h"); + dest.write_bytes(b".h"); i += 1; } _ => { state = path_state::DEFAULT; - charcopy(rewrap_option(&mut dest), &mut destlen, b'.'); + dest.write_byte(b'.'); } }, path_state::H => { if src[i] == b'g' { state = path_state::HGDI; - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } else { state = path_state::DEFAULT; @@ -375,13 +373,9 @@ if src[i] == b'/' { state = path_state::START; if encodedir { - memcopy( - rewrap_option(&mut dest), - &mut destlen, - b".hg", - ); + dest.write_bytes(b".hg"); } - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1 } else { state = path_state::DEFAULT; @@ -390,18 +384,18 @@ path_state::SPACE => match src[i] { b'/' | b'\0' => { state = path_state::START; - memcopy(rewrap_option(&mut dest), &mut destlen, b"~20"); - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_bytes(b"~20"); + dest.write_byte(src[i]); i += 1; } _ => { state = path_state::DEFAULT; - charcopy(rewrap_option(&mut dest), &mut destlen, b' '); + dest.write_byte(b' '); } }, path_state::DEFAULT => { while i != len && inset(onebyte, src[i]) { - charcopy(rewrap_option(&mut dest), &mut destlen, src[i]); + dest.write_byte(src[i]); i += 1; } if i == len { @@ -418,17 +412,13 @@ } b'/' => { state = path_state::START; - charcopy(rewrap_option(&mut dest), &mut destlen, b'/'); + dest.write_byte(b'/'); i += 1; } _ => { if inset(onebyte, src[i]) { loop { - charcopy( - rewrap_option(&mut dest), - &mut destlen, - src[i], - ); + dest.write_byte(src[i]); i += 1; if !(i < len && inset(onebyte, src[i])) { break; @@ -437,22 +427,14 @@ } else if inset(twobytes, src[i]) { let c = src[i]; i += 1; - charcopy( - rewrap_option(&mut dest), - &mut destlen, - b'_', - ); - charcopy( - rewrap_option(&mut dest), - &mut destlen, - if c == b'_' { b'_' } else { c + 32 }, - ); + dest.write_byte(b'_'); + dest.write_byte(if c == b'_' { + b'_' + } else { + c + 32 + }); } else { - escape3( - rewrap_option(&mut dest), - &mut destlen, - src[i], - ); + escape3(dest, src[i]); i += 1; } } @@ -464,17 +446,13 @@ path_state::START => (), path_state::A => (), path_state::AU => (), - path_state::THIRD => { - escape3(rewrap_option(&mut dest), &mut destlen, src[i - 1]) - } + path_state::THIRD => escape3(dest, src[i - 1]), path_state::C => (), path_state::CO => (), - path_state::COMLPT => { - charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1]) - } + path_state::COMLPT => dest.write_byte(src[i - 1]), path_state::COMLPTn => { - escape3(rewrap_option(&mut dest), &mut destlen, src[i - 2]); - charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1]); + escape3(dest, src[i - 2]); + dest.write_byte(src[i - 1]); } path_state::L => (), path_state::LP => (), @@ -484,19 +462,18 @@ path_state::PR => (), path_state::LDOT => (), path_state::DOT => { - memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e"); + dest.write_bytes(b"~2e"); } path_state::H => (), path_state::HGDI => (), path_state::SPACE => { - memcopy(rewrap_option(&mut dest), &mut destlen, b"~20"); + dest.write_bytes(b"~20"); } path_state::DEFAULT => (), - }; - destlen + } } -fn basic_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize { +fn basic_encode(dest: &mut impl Sink, src: &[u8]) { let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0]; let onebyte: [u32; 8] = [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0]; @@ -505,24 +482,22 @@ const MAXSTOREPATHLEN: usize = 120; -fn lower_encode(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize { +fn lower_encode(dest: &mut impl Sink, src: &[u8]) { let onebyte: [u32; 8] = [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0]; let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0]; - let mut destlen = 0; for c in src { if inset(&onebyte, *c) { - charcopy(rewrap_option(&mut dest), &mut destlen, *c) + dest.write_byte(*c) } else if inset(&lower, *c) { - charcopy(rewrap_option(&mut dest), &mut destlen, *c + 32) + dest.write_byte(*c + 32) } else { - escape3(rewrap_option(&mut dest), &mut destlen, *c) + escape3(dest, *c) } } - destlen } -fn aux_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize { +fn aux_encode(dest: &mut impl Sink, src: &[u8]) { let twobytes = [0; 8]; let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0]; _encode(&twobytes, &onebyte, dest, src, false) @@ -531,7 +506,6 @@ fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> { let dirprefixlen = 8; let maxshortdirslen = 68; - let mut destlen = 0; let last_slash = src.iter().rposition(|b| *b == b'/'); let last_dot: Option<usize> = { @@ -539,25 +513,23 @@ src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s) }; - let mut dest = vec![0; MAXSTOREPATHLEN]; - memcopy(Some(&mut dest), &mut destlen, b"dh/"); + let mut dest_vec = vec![0; MAXSTOREPATHLEN]; + let mut dest = Dest::create(&mut dest_vec); + dest.write_bytes(b"dh/"); if let Some(last_slash) = last_slash { for slice in src[..last_slash].split(|b| *b == b'/') { let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)]; - if destlen + slice.len() > maxshortdirslen + 3 { + if dest.len + slice.len() > maxshortdirslen + 3 { break; } else { - memcopy(Some(&mut dest), &mut destlen, slice); - if dest[destlen - 1] == b'.' || dest[destlen - 1] == b' ' { - dest[destlen - 1] = b'_' - } + dest.write_bytes(slice); } - charcopy(Some(&mut dest), &mut destlen, b'/'); + dest.write_byte(b'/'); } } - let used = destlen + 40 + { + let used = dest.len + 40 + { if let Some(l) = last_dot { src.len() - l } else { @@ -577,46 +549,51 @@ Some(l) => l + 1, None => 0, }; - memcopy( - Some(&mut dest), - &mut destlen, - &src[start..][..basenamelen], - ) + dest.write_bytes(&src[start..][..basenamelen]) } } for c in sha { - hexencode(Some(&mut dest), &mut destlen, *c); + hexencode(&mut dest, *c); } if let Some(l) = last_dot { - memcopy(Some(&mut dest), &mut destlen, &src[l..]); + dest.write_bytes(&src[l..]); } - if destlen == dest.len() { - dest + let destlen = dest.len; + if destlen == dest_vec.len() { + dest_vec } else { // sometimes the path are shorter than MAXSTOREPATHLEN - dest[..destlen].to_vec() + dest_vec[..destlen].to_vec() } } const MAXENCODE: usize = 4096 * 4; fn hash_encode(src: &[u8]) -> Vec<u8> { let dired = &mut [0; MAXENCODE]; + let mut dired_dest = Dest::create(dired); let lowered = &mut [0; MAXENCODE]; + let mut lowered_dest = Dest::create(lowered); let auxed = &mut [0; MAXENCODE]; + let mut auxed_dest = Dest::create(auxed); let baselen = (src.len() - 5) * 3; if baselen >= MAXENCODE { panic!("path_encode::hash_encore: string too long: {}", baselen) }; - let dirlen = encode_dir(Some(&mut dired[..]), src); + encode_dir(&mut dired_dest, src); + let dirlen = dired_dest.len; let sha = Sha1::digest(&dired[..dirlen]); - let lowerlen = lower_encode(Some(&mut lowered[..]), &dired[..dirlen][5..]); - let auxlen = aux_encode(Some(&mut auxed[..]), &lowered[..lowerlen]); + lower_encode(&mut lowered_dest, &dired[..dirlen][5..]); + let lowerlen = lowered_dest.len; + aux_encode(&mut auxed_dest, &lowered[..lowerlen]); + let auxlen = auxed_dest.len; hash_mangle(&auxed[..auxlen], &sha) } pub fn path_encode(path: &[u8]) -> Vec<u8> { let newlen = if path.len() <= MAXSTOREPATHLEN { - basic_encode(None, path) + let mut measure = Dest::create_measure(); + basic_encode(&mut measure, path); + measure.len } else { MAXSTOREPATHLEN + 1 }; @@ -625,7 +602,9 @@ path.to_vec() } else { let mut res = vec![0; newlen]; - basic_encode(Some(&mut res), path); + let mut dest = Dest::create(&mut res); + basic_encode(&mut dest, path); + assert!(dest.len == newlen); res } } else {