Mercurial > hg-stable
changeset 51113:687e192dae16
rust-matchers: fix quadratic complexity in `FileMatcher`
Concretely, this command:
```
$ echo hg up -r <nodeid>; time hg revert dir1 dir2 -r <othernode> --debug
hg up -r <nodeid>
real 0m14.690s
user 0m14.766s
sys 0m5.430s
```
was much slower despite using 16 cores before this change.
The approach taken here is the same one used in match.py, in exactmatcher.
This changeset was originally written by Valentin Gatien-Baron in a private
repository.
I have redacted the commit message and did a minor clean up of the code.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 18 Oct 2023 14:50:14 +0200 |
parents | 0250e45040f1 |
children | 042d32355a4a |
files | rust/hg-core/src/matchers.rs |
diffstat | 1 files changed, 31 insertions(+), 22 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/matchers.rs Fri Oct 27 08:54:41 2023 +0200 +++ b/rust/hg-core/src/matchers.rs Wed Oct 18 14:50:14 2023 +0200 @@ -7,6 +7,9 @@ //! Structs and types for matching files and directories. +use format_bytes::format_bytes; +use once_cell::sync::OnceCell; + use crate::{ dirstate::dirs_multiset::DirsChildrenMultiset, filepatterns::{ @@ -23,11 +26,11 @@ use crate::dirstate::status::IgnoreFnType; use crate::filepatterns::normalize_path_bytes; -use std::borrow::ToOwned; use std::collections::HashSet; use std::fmt::{Display, Error, Formatter}; use std::ops::Deref; use std::path::{Path, PathBuf}; +use std::{borrow::ToOwned, collections::BTreeSet}; #[derive(Debug, PartialEq)] pub enum VisitChildrenSet { @@ -173,6 +176,7 @@ pub struct FileMatcher { files: HashSet<HgPathBuf>, dirs: DirsMultiset, + sorted_visitchildrenset_candidates: OnceCell<BTreeSet<HgPathBuf>>, } impl FileMatcher { @@ -181,6 +185,7 @@ Ok(Self { files: HashSet::from_iter(files.into_iter()), dirs, + sorted_visitchildrenset_candidates: OnceCell::new(), }) } fn inner_matches(&self, filename: &HgPath) -> bool { @@ -199,30 +204,34 @@ self.inner_matches(filename) } fn visit_children_set(&self, directory: &HgPath) -> VisitChildrenSet { - if self.files.is_empty() || !self.dirs.contains(&directory) { + if self.files.is_empty() || !self.dirs.contains(directory) { return VisitChildrenSet::Empty; } - let mut candidates: HashSet<HgPathBuf> = - self.dirs.iter().cloned().collect(); - candidates.extend(self.files.iter().cloned()); - candidates.remove(HgPath::new(b"")); - - if !directory.as_ref().is_empty() { - let directory = [directory.as_ref().as_bytes(), b"/"].concat(); - candidates = candidates - .iter() - .filter_map(|c| { - if c.as_bytes().starts_with(&directory) { - Some(HgPathBuf::from_bytes( - &c.as_bytes()[directory.len()..], - )) - } else { - None - } - }) - .collect(); - } + let compute_candidates = || -> BTreeSet<HgPathBuf> { + let mut candidates: BTreeSet<HgPathBuf> = + self.dirs.iter().cloned().collect(); + candidates.extend(self.files.iter().cloned()); + candidates.remove(HgPath::new(b"")); + candidates + }; + let candidates = + if directory.as_ref().is_empty() { + compute_candidates() + } else { + let sorted_candidates = self + .sorted_visitchildrenset_candidates + .get_or_init(compute_candidates); + let directory_bytes = directory.as_ref().as_bytes(); + let start: HgPathBuf = + format_bytes!(b"{}/", directory_bytes).into(); + let start_len = start.len(); + // `0` sorts after `/` + let end = format_bytes!(b"{}0", directory_bytes).into(); + BTreeSet::from_iter(sorted_candidates.range(start..end).map( + |c| HgPathBuf::from_bytes(&c.as_bytes()[start_len..]), + )) + }; // `self.dirs` includes all of the directories, recursively, so if // we're attempting to match 'foo/bar/baz.txt', it'll have '', 'foo',