copies-rust: extract generic map merge logic from merge_copies_dict
This deduplicates the copy-tracing-specific logic
Differential Revision: https://phab.mercurial-scm.org/D9682
--- a/rust/hg-core/src/copy_tracing.rs Mon Dec 21 12:34:59 2020 +0100
+++ b/rust/hg-core/src/copy_tracing.rs Wed Dec 23 11:48:16 2020 +0100
@@ -3,7 +3,6 @@
use crate::Revision;
use crate::NULL_REVISION;
-use im_rc::ordmap::DiffItem;
use im_rc::ordmap::Entry;
use im_rc::ordmap::OrdMap;
use im_rc::OrdSet;
@@ -624,210 +623,40 @@
fn merge_copies_dict(
path_map: &TwoWayPathMap,
current_merge: Revision,
- mut minor: InternalPathCopies,
- mut major: InternalPathCopies,
+ minor: InternalPathCopies,
+ major: InternalPathCopies,
changes: &ChangedFiles,
) -> InternalPathCopies {
- // This closure exist as temporary help while multiple developper are
- // actively working on this code. Feel free to re-inline it once this
- // code is more settled.
- let cmp_value =
- |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
- compare_value(
- path_map,
+ use crate::utils::{ordmap_union_with_merge, MergeResult};
+
+ ordmap_union_with_merge(minor, major, |dest, src_minor, src_major| {
+ let (pick, overwrite) = compare_value(
+ path_map,
+ current_merge,
+ changes,
+ dest,
+ src_minor,
+ src_major,
+ );
+ if overwrite {
+ let (winner, loser) = match pick {
+ MergePick::Major | MergePick::Any => (src_major, src_minor),
+ MergePick::Minor => (src_minor, src_major),
+ };
+ MergeResult::UseNewValue(CopySource::new_from_merge(
current_merge,
- changes,
- dest,
- src_minor,
- src_major,
- )
- };
- if minor.is_empty() {
- major
- } else if major.is_empty() {
- minor
- } else if minor.len() * 2 < major.len() {
- // Lets says we are merging two InternalPathCopies instance A and B.
- //
- // If A contains N items, the merge result will never contains more
- // than N values differents than the one in A
- //
- // If B contains M items, with M > N, the merge result will always
- // result in a minimum of M - N value differents than the on in
- // A
- //
- // As a result, if N < (M-N), we know that simply iterating over A will
- // yield less difference than iterating over the difference
- // between A and B.
- //
- // This help performance a lot in case were a tiny
- // InternalPathCopies is merged with a much larger one.
- for (dest, src_minor) in minor {
- let src_major = major.get(&dest);
- match src_major {
- None => {
- major.insert(dest, src_minor);
- }
- Some(src_major) => {
- let (pick, overwrite) =
- cmp_value(&dest, &src_minor, src_major);
- if overwrite {
- let src = match pick {
- MergePick::Major => CopySource::new_from_merge(
- current_merge,
- src_major,
- &src_minor,
- ),
- MergePick::Minor => CopySource::new_from_merge(
- current_merge,
- &src_minor,
- src_major,
- ),
- MergePick::Any => CopySource::new_from_merge(
- current_merge,
- src_major,
- &src_minor,
- ),
- };
- major.insert(dest, src);
- } else {
- match pick {
- MergePick::Any | MergePick::Major => None,
- MergePick::Minor => major.insert(dest, src_minor),
- };
- }
- }
- };
- }
- major
- } else if major.len() * 2 < minor.len() {
- // This use the same rational than the previous block.
- // (Check previous block documentation for details.)
- for (dest, src_major) in major {
- let src_minor = minor.get(&dest);
- match src_minor {
- None => {
- minor.insert(dest, src_major);
+ winner,
+ loser,
+ ))
+ } else {
+ match pick {
+ MergePick::Any | MergePick::Major => {
+ MergeResult::UseRightValue
}
- Some(src_minor) => {
- let (pick, overwrite) =
- cmp_value(&dest, src_minor, &src_major);
- if overwrite {
- let src = match pick {
- MergePick::Major => CopySource::new_from_merge(
- current_merge,
- &src_major,
- src_minor,
- ),
- MergePick::Minor => CopySource::new_from_merge(
- current_merge,
- src_minor,
- &src_major,
- ),
- MergePick::Any => CopySource::new_from_merge(
- current_merge,
- &src_major,
- src_minor,
- ),
- };
- minor.insert(dest, src);
- } else {
- match pick {
- MergePick::Any | MergePick::Minor => None,
- MergePick::Major => minor.insert(dest, src_major),
- };
- }
- }
- };
- }
- minor
- } else {
- let mut override_minor = Vec::new();
- let mut override_major = Vec::new();
-
- let mut to_major = |k: &PathToken, v: &CopySource| {
- override_major.push((k.clone(), v.clone()))
- };
- let mut to_minor = |k: &PathToken, v: &CopySource| {
- override_minor.push((k.clone(), v.clone()))
- };
-
- // The diff function leverage detection of the identical subpart if
- // minor and major has some common ancestors. This make it very
- // fast is most case.
- //
- // In case where the two map are vastly different in size, the current
- // approach is still slowish because the iteration will iterate over
- // all the "exclusive" content of the larger on. This situation can be
- // frequent when the subgraph of revision we are processing has a lot
- // of roots. Each roots adding they own fully new map to the mix (and
- // likely a small map, if the path from the root to the "main path" is
- // small.
- //
- // We could do better by detecting such situation and processing them
- // differently.
- for d in minor.diff(&major) {
- match d {
- DiffItem::Add(k, v) => to_minor(k, v),
- DiffItem::Remove(k, v) => to_major(k, v),
- DiffItem::Update { old, new } => {
- let (dest, src_major) = new;
- let (_, src_minor) = old;
- let (pick, overwrite) =
- cmp_value(dest, src_minor, src_major);
- if overwrite {
- let src = match pick {
- MergePick::Major => CopySource::new_from_merge(
- current_merge,
- src_major,
- src_minor,
- ),
- MergePick::Minor => CopySource::new_from_merge(
- current_merge,
- src_minor,
- src_major,
- ),
- MergePick::Any => CopySource::new_from_merge(
- current_merge,
- src_major,
- src_minor,
- ),
- };
- to_minor(dest, &src);
- to_major(dest, &src);
- } else {
- match pick {
- MergePick::Major => to_minor(dest, src_major),
- MergePick::Minor => to_major(dest, src_minor),
- // If the two entry are identical, no need to do
- // anything (but diff should not have yield them)
- MergePick::Any => unreachable!(),
- }
- }
- }
- };
- }
-
- let updates;
- let mut result;
- if override_major.is_empty() {
- result = major
- } else if override_minor.is_empty() {
- result = minor
- } else {
- if override_minor.len() < override_major.len() {
- updates = override_minor;
- result = minor;
- } else {
- updates = override_major;
- result = major;
- }
- for (k, v) in updates {
- result.insert(k, v);
+ MergePick::Minor => MergeResult::UseLeftValue,
}
}
- result
- }
+ })
}
/// represent the side that should prevail when merging two
--- a/rust/hg-core/src/utils.rs Mon Dec 21 12:34:59 2020 +0100
+++ b/rust/hg-core/src/utils.rs Wed Dec 23 11:48:16 2020 +0100
@@ -9,6 +9,8 @@
use crate::errors::{HgError, IoErrorContext};
use crate::utils::hg_path::HgPath;
+use im_rc::ordmap::DiffItem;
+use im_rc::ordmap::OrdMap;
use std::{io::Write, ops::Deref};
pub mod files;
@@ -199,3 +201,151 @@
context: IoErrorContext::CurrentExe,
})
}
+
+pub(crate) enum MergeResult<V> {
+ UseLeftValue,
+ UseRightValue,
+ UseNewValue(V),
+}
+
+/// Return the union of the two given maps,
+/// calling `merge(key, left_value, right_value)` to resolve keys that exist in
+/// both.
+///
+/// CC https://github.com/bodil/im-rs/issues/166
+pub(crate) fn ordmap_union_with_merge<K, V>(
+ left: OrdMap<K, V>,
+ right: OrdMap<K, V>,
+ mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+) -> OrdMap<K, V>
+where
+ K: Clone + Ord,
+ V: Clone + PartialEq,
+{
+ if left.ptr_eq(&right) {
+ // One of the two maps is an unmodified clone of the other
+ left
+ } else if left.len() / 2 > right.len() {
+ // When two maps have different sizes,
+ // their size difference is a lower bound on
+ // how many keys of the larger map are not also in the smaller map.
+ // This in turn is a lower bound on the number of differences in
+ // `OrdMap::diff` and the "amount of work" that would be done
+ // by `ordmap_union_with_merge_by_diff`.
+ //
+ // Here `left` is more than twice the size of `right`,
+ // so the number of differences is more than the total size of
+ // `right`. Therefore an algorithm based on iterating `right`
+ // is more efficient.
+ //
+ // This helps a lot when a tiny (or empty) map is merged
+ // with a large one.
+ ordmap_union_with_merge_by_iter(left, right, merge)
+ } else if left.len() < right.len() / 2 {
+ // Same as above but with `left` and `right` swapped
+ ordmap_union_with_merge_by_iter(right, left, |key, a, b| {
+ // Also swapped in `merge` arguments:
+ match merge(key, b, a) {
+ MergeResult::UseNewValue(v) => MergeResult::UseNewValue(v),
+ // … and swap back in `merge` result:
+ MergeResult::UseLeftValue => MergeResult::UseRightValue,
+ MergeResult::UseRightValue => MergeResult::UseLeftValue,
+ }
+ })
+ } else {
+ // For maps of similar size, use the algorithm based on `OrdMap::diff`
+ ordmap_union_with_merge_by_diff(left, right, merge)
+ }
+}
+
+/// Efficient if `right` is much smaller than `left`
+fn ordmap_union_with_merge_by_iter<K, V>(
+ mut left: OrdMap<K, V>,
+ right: OrdMap<K, V>,
+ mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+) -> OrdMap<K, V>
+where
+ K: Clone + Ord,
+ V: Clone,
+{
+ for (key, right_value) in right {
+ match left.get(&key) {
+ None => {
+ left.insert(key, right_value);
+ }
+ Some(left_value) => match merge(&key, left_value, &right_value) {
+ MergeResult::UseLeftValue => {}
+ MergeResult::UseRightValue => {
+ left.insert(key, right_value);
+ }
+ MergeResult::UseNewValue(new_value) => {
+ left.insert(key, new_value);
+ }
+ },
+ }
+ }
+ left
+}
+
+/// Fallback when both maps are of similar size
+fn ordmap_union_with_merge_by_diff<K, V>(
+ mut left: OrdMap<K, V>,
+ mut right: OrdMap<K, V>,
+ mut merge: impl FnMut(&K, &V, &V) -> MergeResult<V>,
+) -> OrdMap<K, V>
+where
+ K: Clone + Ord,
+ V: Clone + PartialEq,
+{
+ // (key, value) pairs that would need to be inserted in either map
+ // in order to turn it into the union.
+ //
+ // TODO: if/when https://github.com/bodil/im-rs/pull/168 is accepted,
+ // change these from `Vec<(K, V)>` to `Vec<(&K, Cow<V>)>`
+ // with `left_updates` only borrowing from `right` and `right_updates` from
+ // `left`, and with `Cow::Owned` used for `MergeResult::UseNewValue`.
+ //
+ // This would allow moving all `.clone()` calls to after we’ve decided
+ // which of `right_updates` or `left_updates` to use
+ // (value ones becoming `Cow::into_owned`),
+ // and avoid making clones we don’t end up using.
+ let mut left_updates = Vec::new();
+ let mut right_updates = Vec::new();
+
+ for difference in left.diff(&right) {
+ match difference {
+ DiffItem::Add(key, value) => {
+ left_updates.push((key.clone(), value.clone()))
+ }
+ DiffItem::Remove(key, value) => {
+ right_updates.push((key.clone(), value.clone()))
+ }
+ DiffItem::Update {
+ old: (key, left_value),
+ new: (_, right_value),
+ } => match merge(key, left_value, right_value) {
+ MergeResult::UseLeftValue => {
+ right_updates.push((key.clone(), left_value.clone()))
+ }
+ MergeResult::UseRightValue => {
+ left_updates.push((key.clone(), right_value.clone()))
+ }
+ MergeResult::UseNewValue(new_value) => {
+ left_updates.push((key.clone(), new_value.clone()));
+ right_updates.push((key.clone(), new_value))
+ }
+ },
+ }
+ }
+ if left_updates.len() < right_updates.len() {
+ for (key, value) in left_updates {
+ left.insert(key, value);
+ }
+ left
+ } else {
+ for (key, value) in right_updates {
+ right.insert(key, value);
+ }
+ right
+ }
+}