Mercurial > hg
changeset 48224:6b5773f89183
rhg: faster hg cat when many files are requested
With this patch I'm seeing a ~39ms improvement (220ms -> 181ms) when
running [hg cat] on ~220 files in a ~260k-file repo.
The timing for [hg cat] on an individual file becomes slightly worse
(losing 5ms: 145ms -> 150ms).
A follow-up commit is intended to improve that.
Differential Revision: https://phab.mercurial-scm.org/D11615
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 04 Oct 2021 19:06:45 +0100 |
parents | b4f83c9e7905 |
children | 0cc69017d47f |
files | rust/hg-core/src/operations/cat.rs rust/rhg/src/commands/cat.rs |
diffstat | 2 files changed, 20 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/operations/cat.rs Fri Jul 16 18:42:20 2021 +0200 +++ b/rust/hg-core/src/operations/cat.rs Mon Oct 04 19:06:45 2021 +0100 @@ -11,6 +11,9 @@ use crate::utils::hg_path::HgPathBuf; +use itertools::EitherOrBoth::{Both, Left, Right}; +use itertools::Itertools; + pub struct CatOutput { /// Whether any file in the manifest matched the paths given as CLI /// arguments @@ -31,7 +34,7 @@ pub fn cat<'a>( repo: &Repo, revset: &str, - files: &'a [HgPathBuf], + mut files: Vec<HgPathBuf>, ) -> Result<CatOutput, RevlogError> { let rev = crate::revset::resolve_single(revset, repo)?; let manifest = repo.manifest_for_rev(rev)?; @@ -40,13 +43,21 @@ .node_from_rev(rev) .expect("should succeed when repo.manifest did"); let mut bytes = vec![]; - let mut matched = vec![false; files.len()]; let mut found_any = false; + files.sort_unstable(); + + let mut missing = vec![]; - for (manifest_file, node_bytes) in manifest.files_with_nodes() { - for (cat_file, is_matched) in files.iter().zip(&mut matched) { - if cat_file.as_bytes() == manifest_file.as_bytes() { - *is_matched = true; + for entry in manifest + .files_with_nodes() + .merge_join_by(files.iter(), |(manifest_file, _), file| { + manifest_file.cmp(&file.as_ref()) + }) + { + match entry { + Left(_) => (), + Right(path) => missing.push(path), + Both((manifest_file, node_bytes), _) => { found_any = true; let file_log = repo.filelog(manifest_file)?; let file_node = Node::from_hex_for_repo(node_bytes)?; @@ -56,11 +67,9 @@ } } - let missing: Vec<_> = files + let missing: Vec<HgPathBuf> = missing .iter() - .zip(&matched) - .filter(|pair| !*pair.1) - .map(|pair| pair.0.clone()) + .map(|file| (*(file.as_ref())).to_owned()) .collect(); Ok(CatOutput { found_any,
--- a/rust/rhg/src/commands/cat.rs Fri Jul 16 18:42:20 2021 +0200 +++ b/rust/rhg/src/commands/cat.rs Mon Oct 04 19:06:45 2021 +0100 @@ -73,7 +73,7 @@ None => format!("{:x}", repo.dirstate_parents()?.p1), }; - let output = cat(&repo, &rev, &files).map_err(|e| (e, rev.as_str()))?; + let output = cat(&repo, &rev, files).map_err(|e| (e, rev.as_str()))?; invocation.ui.write_stdout(&output.concatenated)?; if !output.missing.is_empty() { let short = format!("{:x}", output.node.short()).into_bytes();