copies-rust: split up combine_changeset_copies function into a struct
authorSimon Sapin <simon.sapin@octobus.net>
Tue, 05 Jan 2021 21:02:00 +0100
changeset 46587 cb4b0b0c6de4
parent 46586 435d9fc72646
child 46588 47557ea79fc7
copies-rust: split up combine_changeset_copies function into a struct … such that each iteration of its former loop is now a method call, with the caller driving the loop. This entirely removes the need for the `DataHolder` hack: the method now takes a `ChangedFiles<'_>` parameter that borrows a bytes buffer that can be owned by the caller’s stack frame, just for the duration of that call. Differential Revision: https://phab.mercurial-scm.org/D9683
rust/hg-core/src/copy_tracing.rs
rust/hg-cpython/src/copy_tracing.rs
--- a/rust/hg-core/src/copy_tracing.rs	Wed Dec 23 11:48:16 2020 +0100
+++ b/rust/hg-core/src/copy_tracing.rs	Tue Jan 05 21:02:00 2021 +0100
@@ -110,9 +110,6 @@
 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
 type InternalPathCopies = OrdMap<PathToken, CopySource>;
 
-/// hold parent 1, parent 2 and relevant files actions.
-pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
-
 /// represent the files affected by a changesets
 ///
 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
@@ -334,22 +331,6 @@
     }
 }
 
-/// A small struct whose purpose is to ensure lifetime of bytes referenced in
-/// ChangedFiles
-///
-/// It is passed to the RevInfoMaker callback who can assign any necessary
-/// content to the `data` attribute. The copy tracing code is responsible for
-/// keeping the DataHolder alive at least as long as the ChangedFiles object.
-pub struct DataHolder<D> {
-    /// RevInfoMaker callback should assign data referenced by the
-    /// ChangedFiles struct it return to this attribute. The DataHolder
-    /// lifetime will be at least as long as the ChangedFiles one.
-    pub data: Option<D>,
-}
-
-pub type RevInfoMaker<'a, D> =
-    Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
-
 /// A small "tokenizer" responsible of turning full HgPath into lighter
 /// PathToken
 ///
@@ -382,82 +363,89 @@
 }
 
 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
-///
-/// Arguments are:
-///
-/// revs: all revisions to be considered
-/// children: a {parent ? [childrens]} mapping
-/// target_rev: the final revision we are combining copies to
-/// rev_info(rev): callback to get revision information:
-///   * first parent
-///   * second parent
-///   * ChangedFiles
-/// isancestors(low_rev, high_rev): callback to check if a revision is an
-///                                 ancestor of another
-pub fn combine_changeset_copies<D>(
-    revs: Vec<Revision>,
-    mut children_count: HashMap<Revision, usize>,
-    target_rev: Revision,
-    rev_info: RevInfoMaker<D>,
-) -> PathCopies {
-    let mut all_copies = HashMap::new();
+pub struct CombineChangesetCopies {
+    all_copies: HashMap<Revision, InternalPathCopies>,
+    path_map: TwoWayPathMap,
+    children_count: HashMap<Revision, usize>,
+}
 
-    let mut path_map = TwoWayPathMap::default();
+impl CombineChangesetCopies {
+    pub fn new(children_count: HashMap<Revision, usize>) -> Self {
+        Self {
+            all_copies: HashMap::new(),
+            path_map: TwoWayPathMap::default(),
+            children_count,
+        }
+    }
 
-    for rev in revs {
-        let mut d: DataHolder<D> = DataHolder { data: None };
-        let (p1, p2, changes) = rev_info(rev, &mut d);
-
-        // We will chain the copies information accumulated for the parent with
-        // the individual copies information the curent revision.  Creating a
-        // new TimeStampedPath for each `rev` → `children` vertex.
+    /// Combined the given `changes` data specific to `rev` with the data
+    /// previously given for its parents (and transitively, its ancestors).
+    pub fn add_revision(
+        &mut self,
+        rev: Revision,
+        p1: Revision,
+        p2: Revision,
+        changes: ChangedFiles<'_>,
+    ) {
         // Retrieve data computed in a previous iteration
         let p1_copies = match p1 {
             NULL_REVISION => None,
             _ => get_and_clean_parent_copies(
-                &mut all_copies,
-                &mut children_count,
+                &mut self.all_copies,
+                &mut self.children_count,
                 p1,
             ), // will be None if the vertex is not to be traversed
         };
         let p2_copies = match p2 {
             NULL_REVISION => None,
             _ => get_and_clean_parent_copies(
-                &mut all_copies,
-                &mut children_count,
+                &mut self.all_copies,
+                &mut self.children_count,
                 p2,
             ), // will be None if the vertex is not to be traversed
         };
         // combine it with data for that revision
-        let (p1_copies, p2_copies) =
-            chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
+        let (p1_copies, p2_copies) = chain_changes(
+            &mut self.path_map,
+            p1_copies,
+            p2_copies,
+            &changes,
+            rev,
+        );
         let copies = match (p1_copies, p2_copies) {
             (None, None) => None,
             (c, None) => c,
             (None, c) => c,
             (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
-                &path_map, rev, p2_copies, p1_copies, &changes,
+                &self.path_map,
+                rev,
+                p2_copies,
+                p1_copies,
+                &changes,
             )),
         };
         if let Some(c) = copies {
-            all_copies.insert(rev, c);
+            self.all_copies.insert(rev, c);
         }
     }
 
-    // Drop internal information (like the timestamp) and return the final
-    // mapping.
-    let tt_result = all_copies
-        .remove(&target_rev)
-        .expect("target revision was not processed");
-    let mut result = PathCopies::default();
-    for (dest, tt_source) in tt_result {
-        if let Some(path) = tt_source.path {
-            let path_dest = path_map.untokenize(dest).to_owned();
-            let path_path = path_map.untokenize(path).to_owned();
-            result.insert(path_dest, path_path);
+    /// Drop intermediate data (such as which revision a copy was from) and
+    /// return the final mapping.
+    pub fn finish(mut self, target_rev: Revision) -> PathCopies {
+        let tt_result = self
+            .all_copies
+            .remove(&target_rev)
+            .expect("target revision was not processed");
+        let mut result = PathCopies::default();
+        for (dest, tt_source) in tt_result {
+            if let Some(path) = tt_source.path {
+                let path_dest = self.path_map.untokenize(dest).to_owned();
+                let path_path = self.path_map.untokenize(path).to_owned();
+                result.insert(path_dest, path_path);
+            }
         }
+        result
     }
-    result
 }
 
 /// fetch previous computed information
--- a/rust/hg-cpython/src/copy_tracing.rs	Wed Dec 23 11:48:16 2020 +0100
+++ b/rust/hg-cpython/src/copy_tracing.rs	Tue Jan 05 21:02:00 2021 +0100
@@ -8,11 +8,8 @@
 use cpython::PyTuple;
 use cpython::Python;
 
-use hg::copy_tracing::combine_changeset_copies;
 use hg::copy_tracing::ChangedFiles;
-use hg::copy_tracing::DataHolder;
-use hg::copy_tracing::RevInfo;
-use hg::copy_tracing::RevInfoMaker;
+use hg::copy_tracing::CombineChangesetCopies;
 use hg::Revision;
 
 /// Combines copies information contained into revision `revs` to build a copy
@@ -26,64 +23,41 @@
     target_rev: Revision,
     rev_info: PyObject,
 ) -> PyResult<PyDict> {
-    let revs: PyResult<_> =
-        revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
-
-    // Wrap the `rev_info_maker` python callback as a Rust closure
-    //
-    // No errors are expected from the Python side, and they will should only
-    // happens in case of programing error or severe data corruption. Such
-    // errors will raise panic and the rust-cpython harness will turn them into
-    // Python exception.
-    let rev_info_maker: RevInfoMaker<PyBytes> =
-        Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
-            let res: PyTuple = rev_info
-                .call(py, (rev,), None)
-                .expect("rust-copy-tracing: python call to `rev_info` failed")
-                .cast_into(py)
-                .expect(
-                    "rust-copy_tracing: python call to `rev_info` returned \
-                    unexpected non-Tuple value",
-                );
-            let p1 = res.get_item(py, 0).extract(py).expect(
-                "rust-copy-tracing: rev_info return is invalid, first item \
-                is a not a revision",
-            );
-            let p2 = res.get_item(py, 1).extract(py).expect(
-                "rust-copy-tracing: rev_info return is invalid, first item \
-                is a not a revision",
-            );
-
-            let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
-                Ok(raw) => {
-                    // Give responsability for the raw bytes lifetime to
-                    // hg-core
-                    d.data = Some(raw);
-                    let addrs = d.data.as_ref().expect(
-                        "rust-copy-tracing: failed to get a reference to the \
-                        raw bytes for copy data").data(py);
-                    ChangedFiles::new(addrs)
-                }
-                // value was presumably None, meaning they was no copy data.
-                Err(_) => ChangedFiles::new_empty(),
-            };
-
-            (p1, p2, files)
-        });
-    let children_count: PyResult<_> = children_count
+    let children_count = children_count
         .items(py)
         .iter()
         .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
-        .collect();
+        .collect::<PyResult<_>>()?;
+
+    /// (Revision number, parent 1, parent 2, copy data for this revision)
+    type RevInfo = (Revision, Revision, Revision, Option<PyBytes>);
+
+    let revs_info = revs.iter(py).map(|rev_py| -> PyResult<RevInfo> {
+        let rev = rev_py.extract(py)?;
+        let tuple: PyTuple =
+            rev_info.call(py, (rev_py,), None)?.cast_into(py)?;
+        let p1 = tuple.get_item(py, 0).extract(py)?;
+        let p2 = tuple.get_item(py, 1).extract(py)?;
+        let opt_bytes = tuple.get_item(py, 2).extract(py)?;
+        Ok((rev, p1, p2, opt_bytes))
+    });
 
-    let res = combine_changeset_copies(
-        revs?,
-        children_count?,
-        target_rev,
-        rev_info_maker,
-    );
+    let mut combine_changeset_copies =
+        CombineChangesetCopies::new(children_count);
+
+    for rev_info in revs_info {
+        let (rev, p1, p2, opt_bytes) = rev_info?;
+        let files = match &opt_bytes {
+            Some(bytes) => ChangedFiles::new(bytes.data(py)),
+            // value was presumably None, meaning they was no copy data.
+            None => ChangedFiles::new_empty(),
+        };
+
+        combine_changeset_copies.add_revision(rev, p1, p2, files)
+    }
+    let path_copies = combine_changeset_copies.finish(target_rev);
     let out = PyDict::new(py);
-    for (dest, source) in res.into_iter() {
+    for (dest, source) in path_copies.into_iter() {
         out.set_item(
             py,
             PyBytes::new(py, &dest.into_vec()),