changeset 50159:96d31efd21f7

rhg: reduce verbosity in path_encode by using a trait for writing Hopefully this makes the code easier to read and understand and shorter overall. It also lets us later tweak the type we use as a [Sink], without having to change the encoding functions, including using two different types for size measurement and for the actual serialization.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Thu, 16 Feb 2023 18:29:52 +0000
parents 0d2ec486d95c
children 5ce53ff6133a
files rust/hg-core/src/revlog/path_encode.rs
diffstat 1 files changed, 133 insertions(+), 154 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/path_encode.rs	Thu Feb 16 16:20:17 2023 +0000
+++ b/rust/hg-core/src/revlog/path_encode.rs	Thu Feb 16 18:29:52 2023 +0000
@@ -36,22 +36,31 @@
     DDEFAULT,
 }
 
+trait Sink {
+    fn write_byte(&mut self, c: u8);
+    fn write_bytes(&mut self, c: &[u8]);
+}
+
 fn inset(bitset: &[u32; 8], c: u8) -> bool {
     bitset[(c as usize) >> 5] & (1 << (c & 31)) != 0
 }
 
-fn charcopy(dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
-    if let Some(slice) = dest {
-        slice[*destlen] = c
-    }
-    *destlen += 1
+struct Dest<'a> {
+    dest: Option<&'a mut [u8]>,
+    pub len: usize,
 }
 
-fn memcopy(dest: Option<&mut [u8]>, destlen: &mut usize, src: &[u8]) {
-    if let Some(slice) = dest {
-        slice[*destlen..*destlen + src.len()].copy_from_slice(src)
+impl<'a> Dest<'a> {
+    pub fn create(buf: &'a mut [u8]) -> Dest<'a> {
+        Dest {
+            dest: Some(buf),
+            len: 0,
+        }
     }
-    *destlen += src.len();
+
+    pub fn create_measure() -> Dest<'a> {
+        Dest { dest: None, len: 0 }
+    }
 }
 
 fn rewrap_option<'a, 'b: 'a>(
@@ -63,38 +72,49 @@
     }
 }
 
-fn hexencode(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
+impl<'a> Sink for Dest<'a> {
+    fn write_byte(&mut self, c: u8) {
+        if let Some(slice) = rewrap_option(&mut self.dest) {
+            slice[self.len] = c
+        }
+        self.len += 1
+    }
+
+    fn write_bytes(&mut self, src: &[u8]) {
+        if let Some(slice) = rewrap_option(&mut self.dest) {
+            slice[self.len..self.len + src.len()].copy_from_slice(src)
+        }
+        self.len += src.len();
+    }
+}
+
+fn hexencode(dest: &mut impl Sink, c: u8) {
     let hexdigit = b"0123456789abcdef";
-    charcopy(
-        rewrap_option(&mut dest),
-        destlen,
-        hexdigit[(c as usize) >> 4],
-    );
-    charcopy(dest, destlen, hexdigit[(c as usize) & 15]);
+    dest.write_byte(hexdigit[(c as usize) >> 4]);
+    dest.write_byte(hexdigit[(c as usize) & 15]);
 }
 
 /* 3-byte escape: tilde followed by two hex digits */
-fn escape3(mut dest: Option<&mut [u8]>, destlen: &mut usize, c: u8) {
-    charcopy(rewrap_option(&mut dest), destlen, b'~');
-    hexencode(dest, destlen, c);
+fn escape3(dest: &mut impl Sink, c: u8) {
+    dest.write_byte(b'~');
+    hexencode(dest, c);
 }
 
-fn encode_dir(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize {
+fn encode_dir(dest: &mut impl Sink, src: &[u8]) {
     let mut state = dir_state::DDEFAULT;
     let mut i = 0;
-    let mut destlen = 0;
 
     while i < src.len() {
         match state {
             dir_state::DDOT => match src[i] {
                 b'd' | b'i' => {
                     state = dir_state::DHGDI;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'h' => {
                     state = dir_state::DH;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 _ => {
@@ -104,7 +124,7 @@
             dir_state::DH => {
                 if src[i] == b'g' {
                     state = dir_state::DHGDI;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = dir_state::DDEFAULT;
@@ -112,8 +132,8 @@
             }
             dir_state::DHGDI => {
                 if src[i] == b'/' {
-                    memcopy(rewrap_option(&mut dest), &mut destlen, b".hg");
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_bytes(b".hg");
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 state = dir_state::DDEFAULT;
@@ -122,66 +142,64 @@
                 if src[i] == b'.' {
                     state = dir_state::DDOT
                 }
-                charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                dest.write_byte(src[i]);
                 i += 1;
             }
         }
     }
-    destlen
 }
 
 fn _encode(
     twobytes: &[u32; 8],
     onebyte: &[u32; 8],
-    mut dest: Option<&mut [u8]>,
+    dest: &mut impl Sink,
     src: &[u8],
     encodedir: bool,
-) -> usize {
+) {
     let mut state = path_state::START;
     let mut i = 0;
-    let mut destlen = 0;
     let len = src.len();
 
     while i < len {
         match state {
             path_state::START => match src[i] {
                 b'/' => {
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'.' => {
                     state = path_state::LDOT;
-                    escape3(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    escape3(dest, src[i]);
                     i += 1;
                 }
                 b' ' => {
                     state = path_state::DEFAULT;
-                    escape3(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    escape3(dest, src[i]);
                     i += 1;
                 }
                 b'a' => {
                     state = path_state::A;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'c' => {
                     state = path_state::C;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'l' => {
                     state = path_state::L;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'n' => {
                     state = path_state::N;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'p' => {
                     state = path_state::P;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 _ => {
@@ -191,7 +209,7 @@
             path_state::A => {
                 if src[i] == b'u' {
                     state = path_state::AU;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
@@ -208,18 +226,14 @@
             path_state::THIRD => {
                 state = path_state::DEFAULT;
                 match src[i] {
-                    b'.' | b'/' | b'\0' => escape3(
-                        rewrap_option(&mut dest),
-                        &mut destlen,
-                        src[i - 1],
-                    ),
+                    b'.' | b'/' | b'\0' => escape3(dest, src[i - 1]),
                     _ => i -= 1,
                 }
             }
             path_state::C => {
                 if src[i] == b'o' {
                     state = path_state::CO;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
@@ -242,41 +256,25 @@
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
-                    charcopy(
-                        rewrap_option(&mut dest),
-                        &mut destlen,
-                        src[i - 1],
-                    );
+                    dest.write_byte(src[i - 1]);
                 }
             }
             path_state::COMLPTn => {
                 state = path_state::DEFAULT;
                 match src[i] {
                     b'.' | b'/' | b'\0' => {
-                        escape3(
-                            rewrap_option(&mut dest),
-                            &mut destlen,
-                            src[i - 2],
-                        );
-                        charcopy(
-                            rewrap_option(&mut dest),
-                            &mut destlen,
-                            src[i - 1],
-                        );
+                        escape3(dest, src[i - 2]);
+                        dest.write_byte(src[i - 1]);
                     }
                     _ => {
-                        memcopy(
-                            rewrap_option(&mut dest),
-                            &mut destlen,
-                            &src[i - 2..i],
-                        );
+                        dest.write_bytes(&src[i - 2..i]);
                     }
                 }
             }
             path_state::L => {
                 if src[i] == b'p' {
                     state = path_state::LP;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
@@ -293,7 +291,7 @@
             path_state::N => {
                 if src[i] == b'u' {
                     state = path_state::NU;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
@@ -310,7 +308,7 @@
             path_state::P => {
                 if src[i] == b'r' {
                     state = path_state::PR;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
@@ -327,12 +325,12 @@
             path_state::LDOT => match src[i] {
                 b'd' | b'i' => {
                     state = path_state::HGDI;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'h' => {
                     state = path_state::H;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 _ => {
@@ -342,30 +340,30 @@
             path_state::DOT => match src[i] {
                 b'/' | b'\0' => {
                     state = path_state::START;
-                    memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e");
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_bytes(b"~2e");
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'd' | b'i' => {
                     state = path_state::HGDI;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, b'.');
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(b'.');
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 b'h' => {
                     state = path_state::H;
-                    memcopy(rewrap_option(&mut dest), &mut destlen, b".h");
+                    dest.write_bytes(b".h");
                     i += 1;
                 }
                 _ => {
                     state = path_state::DEFAULT;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, b'.');
+                    dest.write_byte(b'.');
                 }
             },
             path_state::H => {
                 if src[i] == b'g' {
                     state = path_state::HGDI;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 } else {
                     state = path_state::DEFAULT;
@@ -375,13 +373,9 @@
                 if src[i] == b'/' {
                     state = path_state::START;
                     if encodedir {
-                        memcopy(
-                            rewrap_option(&mut dest),
-                            &mut destlen,
-                            b".hg",
-                        );
+                        dest.write_bytes(b".hg");
                     }
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1
                 } else {
                     state = path_state::DEFAULT;
@@ -390,18 +384,18 @@
             path_state::SPACE => match src[i] {
                 b'/' | b'\0' => {
                     state = path_state::START;
-                    memcopy(rewrap_option(&mut dest), &mut destlen, b"~20");
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_bytes(b"~20");
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 _ => {
                     state = path_state::DEFAULT;
-                    charcopy(rewrap_option(&mut dest), &mut destlen, b' ');
+                    dest.write_byte(b' ');
                 }
             },
             path_state::DEFAULT => {
                 while i != len && inset(onebyte, src[i]) {
-                    charcopy(rewrap_option(&mut dest), &mut destlen, src[i]);
+                    dest.write_byte(src[i]);
                     i += 1;
                 }
                 if i == len {
@@ -418,17 +412,13 @@
                     }
                     b'/' => {
                         state = path_state::START;
-                        charcopy(rewrap_option(&mut dest), &mut destlen, b'/');
+                        dest.write_byte(b'/');
                         i += 1;
                     }
                     _ => {
                         if inset(onebyte, src[i]) {
                             loop {
-                                charcopy(
-                                    rewrap_option(&mut dest),
-                                    &mut destlen,
-                                    src[i],
-                                );
+                                dest.write_byte(src[i]);
                                 i += 1;
                                 if !(i < len && inset(onebyte, src[i])) {
                                     break;
@@ -437,22 +427,14 @@
                         } else if inset(twobytes, src[i]) {
                             let c = src[i];
                             i += 1;
-                            charcopy(
-                                rewrap_option(&mut dest),
-                                &mut destlen,
-                                b'_',
-                            );
-                            charcopy(
-                                rewrap_option(&mut dest),
-                                &mut destlen,
-                                if c == b'_' { b'_' } else { c + 32 },
-                            );
+                            dest.write_byte(b'_');
+                            dest.write_byte(if c == b'_' {
+                                b'_'
+                            } else {
+                                c + 32
+                            });
                         } else {
-                            escape3(
-                                rewrap_option(&mut dest),
-                                &mut destlen,
-                                src[i],
-                            );
+                            escape3(dest, src[i]);
                             i += 1;
                         }
                     }
@@ -464,17 +446,13 @@
         path_state::START => (),
         path_state::A => (),
         path_state::AU => (),
-        path_state::THIRD => {
-            escape3(rewrap_option(&mut dest), &mut destlen, src[i - 1])
-        }
+        path_state::THIRD => escape3(dest, src[i - 1]),
         path_state::C => (),
         path_state::CO => (),
-        path_state::COMLPT => {
-            charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1])
-        }
+        path_state::COMLPT => dest.write_byte(src[i - 1]),
         path_state::COMLPTn => {
-            escape3(rewrap_option(&mut dest), &mut destlen, src[i - 2]);
-            charcopy(rewrap_option(&mut dest), &mut destlen, src[i - 1]);
+            escape3(dest, src[i - 2]);
+            dest.write_byte(src[i - 1]);
         }
         path_state::L => (),
         path_state::LP => (),
@@ -484,19 +462,18 @@
         path_state::PR => (),
         path_state::LDOT => (),
         path_state::DOT => {
-            memcopy(rewrap_option(&mut dest), &mut destlen, b"~2e");
+            dest.write_bytes(b"~2e");
         }
         path_state::H => (),
         path_state::HGDI => (),
         path_state::SPACE => {
-            memcopy(rewrap_option(&mut dest), &mut destlen, b"~20");
+            dest.write_bytes(b"~20");
         }
         path_state::DEFAULT => (),
-    };
-    destlen
+    }
 }
 
-fn basic_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize {
+fn basic_encode(dest: &mut impl Sink, src: &[u8]) {
     let twobytes: [u32; 8] = [0, 0, 0x87ff_fffe, 0, 0, 0, 0, 0];
     let onebyte: [u32; 8] =
         [1, 0x2bff_3bfa, 0x6800_0001, 0x2fff_ffff, 0, 0, 0, 0];
@@ -505,24 +482,22 @@
 
 const MAXSTOREPATHLEN: usize = 120;
 
-fn lower_encode(mut dest: Option<&mut [u8]>, src: &[u8]) -> usize {
+fn lower_encode(dest: &mut impl Sink, src: &[u8]) {
     let onebyte: [u32; 8] =
         [1, 0x2bff_fbfb, 0xe800_0001, 0x2fff_ffff, 0, 0, 0, 0];
     let lower: [u32; 8] = [0, 0, 0x07ff_fffe, 0, 0, 0, 0, 0];
-    let mut destlen = 0;
     for c in src {
         if inset(&onebyte, *c) {
-            charcopy(rewrap_option(&mut dest), &mut destlen, *c)
+            dest.write_byte(*c)
         } else if inset(&lower, *c) {
-            charcopy(rewrap_option(&mut dest), &mut destlen, *c + 32)
+            dest.write_byte(*c + 32)
         } else {
-            escape3(rewrap_option(&mut dest), &mut destlen, *c)
+            escape3(dest, *c)
         }
     }
-    destlen
 }
 
-fn aux_encode(dest: Option<&mut [u8]>, src: &[u8]) -> usize {
+fn aux_encode(dest: &mut impl Sink, src: &[u8]) {
     let twobytes = [0; 8];
     let onebyte: [u32; 8] = [!0, 0xffff_3ffe, !0, !0, !0, !0, !0, !0];
     _encode(&twobytes, &onebyte, dest, src, false)
@@ -531,7 +506,6 @@
 fn hash_mangle(src: &[u8], sha: &[u8]) -> Vec<u8> {
     let dirprefixlen = 8;
     let maxshortdirslen = 68;
-    let mut destlen = 0;
 
     let last_slash = src.iter().rposition(|b| *b == b'/');
     let last_dot: Option<usize> = {
@@ -539,25 +513,23 @@
         src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s)
     };
 
-    let mut dest = vec![0; MAXSTOREPATHLEN];
-    memcopy(Some(&mut dest), &mut destlen, b"dh/");
+    let mut dest_vec = vec![0; MAXSTOREPATHLEN];
+    let mut dest = Dest::create(&mut dest_vec);
+    dest.write_bytes(b"dh/");
 
     if let Some(last_slash) = last_slash {
         for slice in src[..last_slash].split(|b| *b == b'/') {
             let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
-            if destlen + slice.len() > maxshortdirslen + 3 {
+            if dest.len + slice.len() > maxshortdirslen + 3 {
                 break;
             } else {
-                memcopy(Some(&mut dest), &mut destlen, slice);
-                if dest[destlen - 1] == b'.' || dest[destlen - 1] == b' ' {
-                    dest[destlen - 1] = b'_'
-                }
+                dest.write_bytes(slice);
             }
-            charcopy(Some(&mut dest), &mut destlen, b'/');
+            dest.write_byte(b'/');
         }
     }
 
-    let used = destlen + 40 + {
+    let used = dest.len + 40 + {
         if let Some(l) = last_dot {
             src.len() - l
         } else {
@@ -577,46 +549,51 @@
                 Some(l) => l + 1,
                 None => 0,
             };
-            memcopy(
-                Some(&mut dest),
-                &mut destlen,
-                &src[start..][..basenamelen],
-            )
+            dest.write_bytes(&src[start..][..basenamelen])
         }
     }
     for c in sha {
-        hexencode(Some(&mut dest), &mut destlen, *c);
+        hexencode(&mut dest, *c);
     }
     if let Some(l) = last_dot {
-        memcopy(Some(&mut dest), &mut destlen, &src[l..]);
+        dest.write_bytes(&src[l..]);
     }
-    if destlen == dest.len() {
-        dest
+    let destlen = dest.len;
+    if destlen == dest_vec.len() {
+        dest_vec
     } else {
         // sometimes the path are shorter than MAXSTOREPATHLEN
-        dest[..destlen].to_vec()
+        dest_vec[..destlen].to_vec()
     }
 }
 
 const MAXENCODE: usize = 4096 * 4;
 fn hash_encode(src: &[u8]) -> Vec<u8> {
     let dired = &mut [0; MAXENCODE];
+    let mut dired_dest = Dest::create(dired);
     let lowered = &mut [0; MAXENCODE];
+    let mut lowered_dest = Dest::create(lowered);
     let auxed = &mut [0; MAXENCODE];
+    let mut auxed_dest = Dest::create(auxed);
     let baselen = (src.len() - 5) * 3;
     if baselen >= MAXENCODE {
         panic!("path_encode::hash_encore: string too long: {}", baselen)
     };
-    let dirlen = encode_dir(Some(&mut dired[..]), src);
+    encode_dir(&mut dired_dest, src);
+    let dirlen = dired_dest.len;
     let sha = Sha1::digest(&dired[..dirlen]);
-    let lowerlen = lower_encode(Some(&mut lowered[..]), &dired[..dirlen][5..]);
-    let auxlen = aux_encode(Some(&mut auxed[..]), &lowered[..lowerlen]);
+    lower_encode(&mut lowered_dest, &dired[..dirlen][5..]);
+    let lowerlen = lowered_dest.len;
+    aux_encode(&mut auxed_dest, &lowered[..lowerlen]);
+    let auxlen = auxed_dest.len;
     hash_mangle(&auxed[..auxlen], &sha)
 }
 
 pub fn path_encode(path: &[u8]) -> Vec<u8> {
     let newlen = if path.len() <= MAXSTOREPATHLEN {
-        basic_encode(None, path)
+        let mut measure = Dest::create_measure();
+        basic_encode(&mut measure, path);
+        measure.len
     } else {
         MAXSTOREPATHLEN + 1
     };
@@ -625,7 +602,9 @@
             path.to_vec()
         } else {
             let mut res = vec![0; newlen];
-            basic_encode(Some(&mut res), path);
+            let mut dest = Dest::create(&mut res);
+            basic_encode(&mut dest, path);
+            assert!(dest.len == newlen);
             res
         }
     } else {