dirstate-tree: optimize HashMap lookups with raw_entry_mut
This switches to using `HashMap` from the hashbrown crate,
in order to use its `raw_entry_mut` method.
The standard library’s `HashMap` is also based on this same crate,
but `raw_entry_mut` is not yet stable there:
https://github.com/rust-lang/rust/issues/56167
Using version 0.9 because 0.10 is yanked and 0.11 requires Rust 1.49
This replaces in `DirstateMap::get_or_insert_node` a call to
`HashMap<K, V>::entry` with `K = WithBasename<Cow<'on_disk, HgPath>>`.
`entry` takes and consumes an "owned" `key: K` parameter, in case a new entry
ends up inserted. This key is converted by `to_cow` from a value that borrows
the `'path` lifetime.
When this function is called by `Dirstate::new_v1`, `'path` is in fact
the same as `'on_disk` so `to_cow` can return an owned key that contains
`Cow::Borrowed`.
For other callers, `to_cow` needs to create a `Cow::Owned` and thus make
a costly heap memory allocation. This is wasteful if this key was already
present in the map. Even when inserting a new node this is typically the case
for its ancestor nodes (assuming most directories have numerous descendants).
Differential Revision: https://phab.mercurial-scm.org/D12317
--- a/rust/Cargo.lock Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/Cargo.lock Tue Feb 08 15:51:52 2022 +0100
@@ -9,6 +9,12 @@
checksum = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
[[package]]
+name = "ahash"
+version = "0.4.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "739f4a8db6605981345c5654f3a85b056ce52f37a39d34da03f25bf2151ea16e"
+
+[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -372,6 +378,16 @@
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
+name = "hashbrown"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
+dependencies = [
+ "ahash",
+ "rayon",
+]
+
+[[package]]
name = "hermit-abi"
version = "0.1.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -398,6 +414,7 @@
"derive_more",
"flate2",
"format-bytes",
+ "hashbrown",
"home",
"im-rc",
"itertools",
--- a/rust/hg-core/Cargo.toml Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/hg-core/Cargo.toml Tue Feb 08 15:51:52 2022 +0100
@@ -13,6 +13,7 @@
bytes-cast = "0.2"
byteorder = "1.3.4"
derive_more = "0.99"
+hashbrown = {version = "0.9.1", features = ["rayon"]}
home = "0.5"
im-rc = "15.0.*"
itertools = "0.9"
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs Tue Feb 08 15:51:52 2022 +0100
@@ -22,7 +22,7 @@
use crate::DirstateParents;
use crate::DirstateStatus;
use crate::EntryState;
-use crate::FastHashMap;
+use crate::FastHashbrownMap as FastHashMap;
use crate::PatternFileWarning;
use crate::StatusError;
use crate::StatusOptions;
@@ -585,13 +585,11 @@
.next()
.expect("expected at least one inclusive ancestor");
loop {
- // TODO: can we avoid allocating an owned key in cases where the
- // map already contains that key, without introducing double
- // lookup?
- let child_node = child_nodes
+ let (_, child_node) = child_nodes
.make_mut(on_disk, unreachable_bytes)?
- .entry(to_cow(ancestor_path))
- .or_default();
+ .raw_entry_mut()
+ .from_key(ancestor_path.base_name())
+ .or_insert_with(|| (to_cow(ancestor_path), Node::default()));
if let Some(next) = inclusive_ancestor_paths.next() {
each_ancestor(child_node);
ancestor_path = next;
--- a/rust/hg-core/src/lib.rs Fri Mar 04 13:33:55 2022 +0100
+++ b/rust/hg-core/src/lib.rs Tue Feb 08 15:51:52 2022 +0100
@@ -56,6 +56,11 @@
/// write access to your repository, you have other issues.
pub type FastHashMap<K, V> = HashMap<K, V, RandomXxHashBuilder64>;
+// TODO: should this be the default `FastHashMap` for all of hg-core, not just
+// dirstate_tree? How does XxHash compare with AHash, hashbrown’s default?
+pub type FastHashbrownMap<K, V> =
+ hashbrown::HashMap<K, V, RandomXxHashBuilder64>;
+
#[derive(Debug, PartialEq)]
pub enum DirstateMapError {
PathNotFound(HgPathBuf),