Mercurial > hg
changeset 52284:f4aede0f01af
rust-manifest: use `memchr` crate for all byte-finding needs
While writing a very dumb manifest diffing algorithm for a proof-of-concept
I saw that `Manifest::find_by_path` was much slower than I was expecting.
It turns out that the Rust stdlib uses slow (all is relative) code when
searching for byte positions for reasons ranging from portability, SIMD
API stability, nobody doing the work, etc. `memch` is much faster for these
purposes, so let's use it.
I was measuring ~670ms of profile time in `find_by_path`, after this patch
it went down to ~230ms.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Tue, 12 Nov 2024 23:20:04 +0100 |
parents | fad30cb98579 |
children | 94cf83d9a2c9 |
files | rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/revlog/manifest.rs rust/hg-core/src/utils.rs |
diffstat | 4 files changed, 6 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/Cargo.lock Wed Oct 30 19:38:56 2024 -0700 +++ b/rust/Cargo.lock Tue Nov 12 23:20:04 2024 +0100 @@ -652,6 +652,7 @@ "libc", "log", "logging_timer", + "memchr", "memmap2", "once_cell", "pretty_assertions",
--- a/rust/hg-core/Cargo.toml Wed Oct 30 19:38:56 2024 -0700 +++ b/rust/hg-core/Cargo.toml Tue Nov 12 23:20:04 2024 +0100 @@ -22,6 +22,7 @@ lazy_static = "1.4.0" libc = "0.2.137" logging_timer = "1.1.0" +memchr = "2" rand = "0.8.5" rand_pcg = "0.3.1" rand_distr = "0.4.3"
--- a/rust/hg-core/src/revlog/manifest.rs Wed Oct 30 19:38:56 2024 -0700 +++ b/rust/hg-core/src/revlog/manifest.rs Tue Nov 12 23:20:04 2024 +0100 @@ -145,12 +145,11 @@ let middle = bytes.len() / 2; // Integer division rounds down, so `middle < len`. let (before, after) = bytes.split_at(middle); - let is_newline = |&byte: &u8| byte == b'\n'; - let entry_start = match before.iter().rposition(is_newline) { + let entry_start = match memchr::memrchr(b'\n', before) { Some(i) => i + 1, None => 0, // We choose the first entry in `bytes` }; - let entry_end = match after.iter().position(is_newline) { + let entry_end = match memchr::memchr(b'\n', after) { Some(i) => { // No `+ 1` here to exclude this newline from the range middle + i
--- a/rust/hg-core/src/utils.rs Wed Oct 30 19:38:56 2024 -0700 +++ b/rust/hg-core/src/utils.rs Tue Nov 12 23:20:04 2024 +0100 @@ -134,10 +134,8 @@ } fn split_2(&self, separator: u8) -> Option<(&[u8], &[u8])> { - let mut iter = self.splitn(2, |&byte| byte == separator); - let a = iter.next()?; - let b = iter.next()?; - Some((a, b)) + let pos = memchr::memchr(separator, self)?; + Some((&self[..pos], &self[pos + 1..])) } fn split_2_by_slice(&self, separator: &[u8]) -> Option<(&[u8], &[u8])> {