changeset 50212:11661326b410

rhg: in path_encode, split Dest into VecDest and MeasureDest Two separate types make the write semantics easier to understand because we can consider the two sinks separately. Having two independent compiled functions for size measurement and for actual encoding seems likely to improve performance, too. (and maybe we should get rid of measurement altogether) Getting rid of [Dest] also removes the ugly option rewrapping code, which is good.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Thu, 16 Feb 2023 19:00:56 +0000
parents 6baea276a985
children 8e50aa0db347
files rust/hg-core/src/revlog/path_encode.rs
diffstat 1 files changed, 33 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/path_encode.rs	Thu Feb 16 18:46:44 2023 +0000
+++ b/rust/hg-core/src/revlog/path_encode.rs	Thu Feb 16 19:00:56 2023 +0000
@@ -77,45 +77,44 @@
     }
 }
 
-struct Dest<'a> {
-    dest: Option<&'a mut [u8]>,
+struct VecDest {
+    buf: Vec<u8>,
+}
+
+struct MeasureDest {
     pub len: usize,
 }
 
-impl<'a> Dest<'a> {
-    pub fn create(buf: &'a mut [u8]) -> Dest<'a> {
-        Dest {
-            dest: Some(buf),
-            len: 0,
+impl VecDest {
+    pub fn create(capacity : usize) -> Self {
+        Self {
+            buf: Vec::with_capacity(capacity),
         }
     }
-
-    pub fn create_measure() -> Dest<'a> {
-        Dest { dest: None, len: 0 }
-    }
 }
 
-fn rewrap_option<'a, 'b: 'a>(
-    x: &'a mut Option<&'b mut [u8]>,
-) -> Option<&'a mut [u8]> {
-    match x {
-        None => None,
-        Some(y) => Some(y),
+impl Sink for VecDest {
+    fn write_byte(&mut self, c: u8) {
+        self.buf.push(c)
+    }
+
+    fn write_bytes(&mut self, src: &[u8]) {
+        self.buf.extend_from_slice(src)
     }
 }
 
-impl<'a> Sink for Dest<'a> {
-    fn write_byte(&mut self, c: u8) {
-        if let Some(slice) = rewrap_option(&mut self.dest) {
-            slice[self.len] = c
-        }
-        self.len += 1
+impl MeasureDest {
+    fn create() -> Self {
+        Self { len: 0 }
+    }
+}
+
+impl Sink for MeasureDest {
+    fn write_byte(&mut self, _c: u8) {
+        self.len += 1;
     }
 
     fn write_bytes(&mut self, src: &[u8]) {
-        if let Some(slice) = rewrap_option(&mut self.dest) {
-            slice[self.len..self.len + src.len()].copy_from_slice(src)
-        }
         self.len += src.len();
     }
 }
@@ -545,13 +544,13 @@
         src[s..].iter().rposition(|b| *b == b'.').map(|i| i + s)
     };
 
-    let mut dest : DestArr<MAXSTOREPATHLEN> = DestArr::create();
+    let mut dest : VecDest = VecDest::create(MAXSTOREPATHLEN);
     dest.write_bytes(b"dh/");
 
     if let Some(last_slash) = last_slash {
         for slice in src[..last_slash].split(|b| *b == b'/') {
             let slice = &slice[..std::cmp::min(slice.len(), dirprefixlen)];
-            if dest.len + slice.len() > maxshortdirslen + 3 {
+            if dest.buf.len() + slice.len() > maxshortdirslen + 3 {
                 break;
             } else {
                 dest.write_bytes(slice);
@@ -560,7 +559,7 @@
         }
     }
 
-    let used = dest.len + 40 + {
+    let used = dest.buf.len() + 40 + {
         if let Some(l) = last_dot {
             src.len() - l
         } else {
@@ -589,7 +588,7 @@
     if let Some(l) = last_dot {
         dest.write_bytes(&src[l..]);
     }
-    dest.contents().to_vec()
+    dest.buf
 }
 
 fn hash_encode(src: &[u8]) -> Vec<u8> {
@@ -609,7 +608,7 @@
 
 pub fn path_encode(path: &[u8]) -> Vec<u8> {
     let newlen = if path.len() <= MAXSTOREPATHLEN {
-        let mut measure = Dest::create_measure();
+        let mut measure = MeasureDest::create();
         basic_encode(&mut measure, path);
         measure.len
     } else {
@@ -619,11 +618,10 @@
         if newlen == path.len() {
             path.to_vec()
         } else {
-            let mut res = vec![0; newlen];
-            let mut dest = Dest::create(&mut res);
+            let mut dest = VecDest::create(newlen);
             basic_encode(&mut dest, path);
-            assert!(dest.len == newlen);
-            res
+            assert!(dest.buf.len() == newlen);
+            dest.buf
         }
     } else {
         hash_encode(path)