rhg: in path_encode, split Dest into VecDest and MeasureDest
authorArseniy Alekseyev <aalekseyev@janestreet.com>
Thu, 16 Feb 2023 19:00:56 +0000
changeset 50212 11661326b410
parent 50211 6baea276a985
child 50213 8e50aa0db347
rhg: in path_encode, split Dest into VecDest and MeasureDest Two separate types make the write semantics easier to understand because we can consider the two sinks separately. Having two independent compiled functions for size measurement and for actual encoding seems likely to improve performance, too. (and maybe we should get rid of measurement altogether) Getting rid of [Dest] also removes the ugly option rewrapping code, which is good.
rust/hg-core/src/revlog/path_encode.rs
--- a/rust/hg-core/src/revlog/path_encode.rs	Thu Feb 16 18:46:44 2023 +0000
+++ b/rust/hg-core/src/revlog/path_encode.rs	Thu Feb 16 19:00:56 2023 +0000
@@ -77,45 +77,44 @@
     }
 }
 
-struct Dest<'a> {
-    dest: Option<&'a mut [u8]>,
+struct VecDest {
+    buf: Vec<u8>,
+}
+
+struct MeasureDest {
     pub len: usize,
 }
 
-impl<'a> Dest<'a> {
-    pub fn create(buf: &'a mut [u8]) -> Dest<'a> {
-        Dest {
-            dest: Some(buf),
-            len: 0,
+impl VecDest {
+    pub fn create(capacity : usize) -> Self {
+        Self {
+            buf: Vec::with_capacity(capacity),
         }
     }
-
-    pub fn create_measure() -> Dest<'a> {
-        Dest { dest: None, len: 0 }
-    }
 }
 
-fn rewrap_option<'a, 'b: 'a>(
-    x: &'a mut Option<&'b mut [u8]>,
-) -> Option<&'a mut [u8]> {
-    match x {
-        None => None,
-        Some(y) => Some(y),
+impl Sink for VecDest {
+    fn write_byte(&mut self, c: u8) {
+        self.buf.push(c)
+    }
+
+    fn write_bytes(&mut self, src: &[u8]) {
+        self.buf.extend_from_slice(src)
     }
 }
 
-impl<'a> Sink for Dest<'a> {
-    fn write_byte(&mut self, c: u8) {
-        if let Some(slice) = rewrap_option(&mut self.dest) {
-            slice[self.len] = c
-        }
-        self.len += 1
+impl MeasureDest {
+    fn create() -> Self {
+        Self { len: 0 }
+    }
+}
+
+impl Sink for MeasureDest {
+    fn write_byte(&mut self, _c: u8) {
+        self.len += 1;
     }
 
     fn write_bytes(&mut self, src: &[u8]) {
-        if let Some(slice) = rewrap_option(&mut self.dest) {
-            slice[self.len..self.len + src.len()].copy_from_slice(src)
-        }
         self.len += src.len();
     }
 }
@@ -545,13 +544,13 @@
         src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s)
     };
 
-    let mut dest : DestArr<MAXSTOREPATHLEN> = DestArr::create();
+    let mut dest : VecDest = VecDest::create(MAXSTOREPATHLEN);
     dest.write_bytes(b"dh/");
 
     if let Some(last_slash) = last_slash {
         for slice in src[..last_slash].split(|b| *b == b'/') {
             let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
-            if dest.len + slice.len() > maxshortdirslen + 3 {
+            if dest.buf.len() + slice.len() > maxshortdirslen + 3 {
                 break;
             } else {
                 dest.write_bytes(slice);
@@ -560,7 +559,7 @@
         }
     }
 
-    let used = dest.len + 40 + {
+    let used = dest.buf.len() + 40 + {
         if let Some(l) = last_dot {
             src.len() - l
         } else {
@@ -589,7 +588,7 @@
     if let Some(l) = last_dot {
         dest.write_bytes(&src[l..]);
     }
-    dest.contents().to_vec()
+    dest.buf
 }
 
 fn hash_encode(src: &[u8]) -> Vec<u8> {
@@ -609,7 +608,7 @@
 
 pub fn path_encode(path: &[u8]) -> Vec<u8> {
     let newlen = if path.len() <= MAXSTOREPATHLEN {
-        let mut measure = Dest::create_measure();
+        let mut measure = MeasureDest::create();
         basic_encode(&mut measure, path);
         measure.len
     } else {
@@ -619,11 +618,10 @@
         if newlen == path.len() {
             path.to_vec()
         } else {
-            let mut res = vec![0; newlen];
-            let mut dest = Dest::create(&mut res);
+            let mut dest = VecDest::create(newlen);
             basic_encode(&mut dest, path);
-            assert!(dest.len == newlen);
-            res
+            assert!(dest.buf.len() == newlen);
+            dest.buf
         }
     } else {
         hash_encode(path)