Mercurial > hg
changeset 48950:11c0411bf4e2
dirstate-tree: optimize HashMap lookups with raw_entry_mut
This switches to using `HashMap` from the hashbrown crate,
in order to use its `raw_entry_mut` method.
The standard library’s `HashMap` is also based on this same crate,
but `raw_entry_mut` is not yet stable there:
https://github.com/rust-lang/rust/issues/56167
Using version 0.9 because 0.10 is yanked and 0.11 requires Rust 1.49
This replaces in `DirstateMap::get_or_insert_node` a call to
`HashMap<K, V>::entry` with `K = WithBasename<Cow<'on_disk, HgPath>>`.
`entry` takes and consumes an "owned" `key: K` parameter, in case a new entry
ends up inserted. This key is converted by `to_cow` from a value that borrows
the `'path` lifetime.
When this function is called by `Dirstate::new_v1`, `'path` is in fact
the same as `'on_disk` so `to_cow` can return an owned key that contains
`Cow::Borrowed`.
For other callers, `to_cow` needs to create a `Cow::Owned` and thus make
a costly heap memory allocation. This is wasteful if this key was already
present in the map. Even when inserting a new node this is typically the case
for its ancestor nodes (assuming most directories have numerous descendants).
Differential Revision: https://phab.mercurial-scm.org/D12317
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Tue, 08 Feb 2022 15:51:52 +0100 |
parents | 469b9ee336a6 |
children | cf99c4af1079 |
files | rust/Cargo.lock rust/hg-core/Cargo.toml rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-core/src/lib.rs |
diffstat | 4 files changed, 28 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/Cargo.lock Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/Cargo.lock Tue Feb 08 15:51:52 2022 +0100 @@ -9,6 +9,12 @@ checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e" [[package]] +name = "ahash" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e" + +[[package]] name = "aho-corasick" version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -372,6 +378,16 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" [[package]] +name = "hashbrown" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04" +dependencies = [ + "ahash", + "rayon", +] + +[[package]] name = "hermit-abi" version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -398,6 +414,7 @@ "derive_more", "flate2", "format-bytes", + "hashbrown", "home", "im-rc", "itertools",
--- a/rust/hg-core/Cargo.toml Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/hg-core/Cargo.toml Tue Feb 08 15:51:52 2022 +0100 @@ -13,6 +13,7 @@ bytes-cast = "0.2" byteorder = "1.3.4" derive_more = "0.99" +hashbrown = {version = "0.9.1", features = ["rayon"]} home = "0.5" im-rc = "15.0.*" itertools = "0.9"
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Feb 08 15:51:52 2022 +0100 @@ -22,7 +22,7 @@ use crate::DirstateParents; use crate::DirstateStatus; use crate::EntryState; -use crate::FastHashMap; +use crate::FastHashbrownMap as FastHashMap; use crate::PatternFileWarning; use crate::StatusError; use crate::StatusOptions; @@ -585,13 +585,11 @@ .next() .expect("expected at least one inclusive ancestor"); loop { - // TODO: can we avoid allocating an owned key in cases where the - // map already contains that key, without introducing double - // lookup? - let child_node = child_nodes + let (_, child_node) = child_nodes .make_mut(on_disk, unreachable_bytes)? - .entry(to_cow(ancestor_path)) - .or_default(); + .raw_entry_mut() + .from_key(ancestor_path.base_name()) + .or_insert_with(|| (to_cow(ancestor_path), Node::default())); if let Some(next) = inclusive_ancestor_paths.next() { each_ancestor(child_node); ancestor_path = next;
--- a/rust/hg-core/src/lib.rs Fri Mar 04 13:33:55 2022 +0100 +++ b/rust/hg-core/src/lib.rs Tue Feb 08 15:51:52 2022 +0100 @@ -56,6 +56,11 @@ /// write access to your repository, you have other issues. pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>; +// TODO: should this be the default `FastHashMap` for all of hg-core, not just +// dirstate_tree? How does XxHash compare with AHash, hashbrown’s default? +pub type FastHashbrownMap<K, V> = + hashbrown::HashMap<K, V, RandomXxHashBuilder64>; + #[derive(Debug, PartialEq)] pub enum DirstateMapError { PathNotFound(HgPathBuf),