changeset 52284:f4aede0f01af

rust-manifest: use `memchr` crate for all byte-finding needs While writing a very dumb manifest diffing algorithm for a proof-of-concept I saw that `Manifest::find_by_path` was much slower than I was expecting. It turns out that the Rust stdlib uses slow (all is relative) code when searching for byte positions for reasons ranging from portability, SIMD API stability, nobody doing the work, etc. `memch` is much faster for these purposes, so let's use it. I was measuring ~670ms of profile time in `find_by_path`, after this patch it went down to ~230ms.
author Raphaël Gomès <rgomes@octobus.net>
date Tue, 12 Nov 2024 23:20:04 +0100
parents fad30cb98579
children 94cf83d9a2c9
files rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/utils.rs
diffstat 4 files changed, 6 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/rust/Cargo.lock	Wed Oct 30 19:38:56 2024 -0700
+++ b/rust/Cargo.lock	Tue Nov 12 23:20:04 2024 +0100
@@ -652,6 +652,7 @@
  "libc",
  "log",
  "logging_timer",
+ "memchr",
  "memmap2",
  "once_cell",
  "pretty_assertions",
--- a/rust/hg-core/Cargo.toml	Wed Oct 30 19:38:56 2024 -0700
+++ b/rust/hg-core/Cargo.toml	Tue Nov 12 23:20:04 2024 +0100
@@ -22,6 +22,7 @@
 lazy_static = "1.4.0"
 libc = "0.2.137"
 logging_timer = "1.1.0"
+memchr = "2"
 rand = "0.8.5"
 rand_pcg = "0.3.1"
 rand_distr = "0.4.3"
--- a/rust/hg-core/src/revlog/manifest.rs	Wed Oct 30 19:38:56 2024 -0700
+++ b/rust/hg-core/src/revlog/manifest.rs	Tue Nov 12 23:20:04 2024 +0100
@@ -145,12 +145,11 @@
             let middle = bytes.len() / 2;
             // Integer division rounds down, so `middle < len`.
             let (before, after) = bytes.split_at(middle);
-            let is_newline = |&byte: &u8| byte == b'\n';
-            let entry_start = match before.iter().rposition(is_newline) {
+            let entry_start = match memchr::memrchr(b'\n', before) {
                 Some(i) => i + 1,
                 None => 0, // We choose the first entry in `bytes`
             };
-            let entry_end = match after.iter().position(is_newline) {
+            let entry_end = match memchr::memchr(b'\n', after) {
                 Some(i) => {
                     // No `+ 1` here to exclude this newline from the range
                     middle + i
--- a/rust/hg-core/src/utils.rs	Wed Oct 30 19:38:56 2024 -0700
+++ b/rust/hg-core/src/utils.rs	Tue Nov 12 23:20:04 2024 +0100
@@ -134,10 +134,8 @@
     }
 
     fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> {
-        let mut iter = self.splitn(2, |&byte| byte == separator);
-        let a = iter.next()?;
-        let b = iter.next()?;
-        Some((a, b))
+        let pos = memchr::memchr(separator, self)?;
+        Some((&self[..pos], &self[pos + 1..]))
     }
 
     fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {