rust/hg-core/src/copy_tracing.rs
changeset 46151 c6bc77f7e593
parent 46149 294d5aca4ff5
child 46152 e166e8a035a7
--- a/rust/hg-core/src/copy_tracing.rs	Fri Nov 20 13:46:14 2020 +0100
+++ b/rust/hg-core/src/copy_tracing.rs	Sat Apr 25 12:37:46 2020 +0200
@@ -12,9 +12,9 @@
 
 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
 
-type PathToken = HgPathBuf;
+type PathToken = usize;
 
-#[derive(Clone, Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq, Copy)]
 struct TimeStampedPathCopy {
     /// revision at which the copy information was added
     rev: Revision,
@@ -314,6 +314,37 @@
     SecondParent,
 }
 
+/// A small "tokenizer" responsible of turning full HgPath into lighter
+/// PathToken
+///
+/// Dealing with small object, like integer is much faster, so HgPath input are
+/// turned into integer "PathToken" and converted back in the end.
+#[derive(Clone, Debug, Default)]
+struct TwoWayPathMap {
+    token: HashMap<HgPathBuf, PathToken>,
+    path: Vec<HgPathBuf>,
+}
+
+impl TwoWayPathMap {
+    fn tokenize(&mut self, path: &HgPath) -> PathToken {
+        match self.token.get(path) {
+            Some(a) => *a,
+            None => {
+                let a = self.token.len();
+                let buf = path.to_owned();
+                self.path.push(buf.clone());
+                self.token.insert(buf, a);
+                a
+            }
+        }
+    }
+
+    fn untokenize(&self, token: PathToken) -> &HgPathBuf {
+        assert!(token < self.path.len(), format!("Unknown token: {}", token));
+        &self.path[token]
+    }
+}
+
 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
 ///
 /// Arguments are:
@@ -337,6 +368,8 @@
     let mut all_copies = HashMap::new();
     let mut oracle = AncestorOracle::new(is_ancestor);
 
+    let mut path_map = TwoWayPathMap::default();
+
     for rev in revs {
         let mut d: DataHolder<D> = DataHolder { data: None };
         let (p1, p2, changes) = rev_info(rev, &mut d);
@@ -355,6 +388,7 @@
             if let Some(parent_copies) = parent_copies {
                 // combine it with data for that revision
                 let vertex_copies = add_from_changes(
+                    &mut path_map,
                     &parent_copies,
                     &changes,
                     Parent::FirstParent,
@@ -374,6 +408,7 @@
             if let Some(parent_copies) = parent_copies {
                 // combine it with data for that revision
                 let vertex_copies = add_from_changes(
+                    &mut path_map,
                     &parent_copies,
                     &changes,
                     Parent::SecondParent,
@@ -388,6 +423,7 @@
                     // If we got data from both parents, We need to combine
                     // them.
                     Some(copies) => Some(merge_copies_dict(
+                        &path_map,
                         vertex_copies,
                         copies,
                         &changes,
@@ -412,7 +448,9 @@
     let mut result = PathCopies::default();
     for (dest, tt_source) in tt_result {
         if let Some(path) = tt_source.path {
-            result.insert(dest, path);
+            let path_dest = path_map.untokenize(dest).to_owned();
+            let path_path = path_map.untokenize(path).to_owned();
+            result.insert(path_dest, path_path);
         }
     }
     result
@@ -447,6 +485,7 @@
 /// Combine ChangedFiles with some existing PathCopies information and return
 /// the result
 fn add_from_changes(
+    path_map: &mut TwoWayPathMap,
     base_copies: &TimeStampedPathCopies,
     changes: &ChangedFiles,
     parent: Parent,
@@ -455,9 +494,11 @@
     let mut copies = base_copies.clone();
     for action in changes.iter_actions(parent) {
         match action {
-            Action::Copied(dest, source) => {
+            Action::Copied(path_dest, path_source) => {
+                let dest = path_map.tokenize(path_dest);
+                let source = path_map.tokenize(path_source);
                 let entry;
-                if let Some(v) = base_copies.get(source) {
+                if let Some(v) = base_copies.get(&source) {
                     entry = match &v.path {
                         Some(path) => Some((*(path)).to_owned()),
                         None => Some(source.to_owned()),
@@ -475,18 +516,19 @@
                 };
                 copies.insert(dest.to_owned(), ttpc);
             }
-            Action::Removed(f) => {
+            Action::Removed(deleted_path) => {
                 // We must drop copy information for removed file.
                 //
                 // We need to explicitly record them as dropped to
                 // propagate this information when merging two
                 // TimeStampedPathCopies object.
-                if copies.contains_key(f.as_ref()) {
+                let deleted = path_map.tokenize(deleted_path);
+                if copies.contains_key(&deleted) {
                     let ttpc = TimeStampedPathCopy {
                         rev: current_rev,
                         path: None,
                     };
-                    copies.insert(f.to_owned(), ttpc);
+                    copies.insert(deleted, ttpc);
                 }
             }
         }
@@ -499,6 +541,7 @@
 /// In case of conflict, value from "major" will be picked, unless in some
 /// cases. See inline documentation for details.
 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
+    path_map: &TwoWayPathMap,
     mut minor: TimeStampedPathCopies,
     mut major: TimeStampedPathCopies,
     changes: &ChangedFiles,
@@ -511,7 +554,9 @@
         |dest: &PathToken,
          src_minor: &TimeStampedPathCopy,
          src_major: &TimeStampedPathCopy| {
-            compare_value(changes, oracle, dest, src_minor, src_major)
+            compare_value(
+                path_map, changes, oracle, dest, src_minor, src_major,
+            )
         };
     if minor.is_empty() {
         major
@@ -641,6 +686,7 @@
 /// decide which side prevails in case of conflicting values
 #[allow(clippy::if_same_then_else)]
 fn compare_value<A: Fn(Revision, Revision) -> bool>(
+    path_map: &TwoWayPathMap,
     changes: &ChangedFiles,
     oracle: &mut AncestorOracle<A>,
     dest: &PathToken,
@@ -664,7 +710,8 @@
             "conflict information from p1 and p2 in the same revision"
         );
     } else {
-        let action = changes.get_merge_case(&dest);
+        let dest_path = path_map.untokenize(*dest);
+        let action = changes.get_merge_case(dest_path);
         if src_major.path.is_none() && action == MergeCase::Salvaged {
             // If the file is "deleted" in the major side but was
             // salvaged by the merge, we keep the minor side alive