copies-rust: split up combine_changeset_copies function into a struct
… such that each iteration of its former loop is now a method call,
with the caller driving the loop.
This entirely removes the need for the `DataHolder` hack:
the method now takes a `ChangedFiles<'_>` parameter that borrows
a bytes buffer that can be owned by the caller’s stack frame,
just for the duration of that call.
Differential Revision: https://phab.mercurial-scm.org/D9683
--- a/rust/hg-core/src/copy_tracing.rs Wed Dec 23 11:48:16 2020 +0100
+++ b/rust/hg-core/src/copy_tracing.rs Tue Jan 05 21:02:00 2021 +0100
@@ -110,9 +110,6 @@
/// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
type InternalPathCopies = OrdMap<PathToken, CopySource>;
-/// hold parent 1, parent 2 and relevant files actions.
-pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
-
/// represent the files affected by a changesets
///
/// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
@@ -334,22 +331,6 @@
}
}
-/// A small struct whose purpose is to ensure lifetime of bytes referenced in
-/// ChangedFiles
-///
-/// It is passed to the RevInfoMaker callback who can assign any necessary
-/// content to the `data` attribute. The copy tracing code is responsible for
-/// keeping the DataHolder alive at least as long as the ChangedFiles object.
-pub struct DataHolder<D> {
- /// RevInfoMaker callback should assign data referenced by the
- /// ChangedFiles struct it return to this attribute. The DataHolder
- /// lifetime will be at least as long as the ChangedFiles one.
- pub data: Option<D>,
-}
-
-pub type RevInfoMaker<'a, D> =
- Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
-
/// A small "tokenizer" responsible of turning full HgPath into lighter
/// PathToken
///
@@ -382,82 +363,89 @@
}
/// Same as mercurial.copies._combine_changeset_copies, but in Rust.
-///
-/// Arguments are:
-///
-/// revs: all revisions to be considered
-/// children: a {parent ? [childrens]} mapping
-/// target_rev: the final revision we are combining copies to
-/// rev_info(rev): callback to get revision information:
-/// * first parent
-/// * second parent
-/// * ChangedFiles
-/// isancestors(low_rev, high_rev): callback to check if a revision is an
-/// ancestor of another
-pub fn combine_changeset_copies<D>(
- revs: Vec<Revision>,
- mut children_count: HashMap<Revision, usize>,
- target_rev: Revision,
- rev_info: RevInfoMaker<D>,
-) -> PathCopies {
- let mut all_copies = HashMap::new();
+pub struct CombineChangesetCopies {
+ all_copies: HashMap<Revision, InternalPathCopies>,
+ path_map: TwoWayPathMap,
+ children_count: HashMap<Revision, usize>,
+}
- let mut path_map = TwoWayPathMap::default();
+impl CombineChangesetCopies {
+ pub fn new(children_count: HashMap<Revision, usize>) -> Self {
+ Self {
+ all_copies: HashMap::new(),
+ path_map: TwoWayPathMap::default(),
+ children_count,
+ }
+ }
- for rev in revs {
- let mut d: DataHolder<D> = DataHolder { data: None };
- let (p1, p2, changes) = rev_info(rev, &mut d);
-
- // We will chain the copies information accumulated for the parent with
- // the individual copies information the curent revision. Creating a
- // new TimeStampedPath for each `rev` → `children` vertex.
+ /// Combined the given `changes` data specific to `rev` with the data
+ /// previously given for its parents (and transitively, its ancestors).
+ pub fn add_revision(
+ &mut self,
+ rev: Revision,
+ p1: Revision,
+ p2: Revision,
+ changes: ChangedFiles<'_>,
+ ) {
// Retrieve data computed in a previous iteration
let p1_copies = match p1 {
NULL_REVISION => None,
_ => get_and_clean_parent_copies(
- &mut all_copies,
- &mut children_count,
+ &mut self.all_copies,
+ &mut self.children_count,
p1,
), // will be None if the vertex is not to be traversed
};
let p2_copies = match p2 {
NULL_REVISION => None,
_ => get_and_clean_parent_copies(
- &mut all_copies,
- &mut children_count,
+ &mut self.all_copies,
+ &mut self.children_count,
p2,
), // will be None if the vertex is not to be traversed
};
// combine it with data for that revision
- let (p1_copies, p2_copies) =
- chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev);
+ let (p1_copies, p2_copies) = chain_changes(
+ &mut self.path_map,
+ p1_copies,
+ p2_copies,
+ &changes,
+ rev,
+ );
let copies = match (p1_copies, p2_copies) {
(None, None) => None,
(c, None) => c,
(None, c) => c,
(Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict(
- &path_map, rev, p2_copies, p1_copies, &changes,
+ &self.path_map,
+ rev,
+ p2_copies,
+ p1_copies,
+ &changes,
)),
};
if let Some(c) = copies {
- all_copies.insert(rev, c);
+ self.all_copies.insert(rev, c);
}
}
- // Drop internal information (like the timestamp) and return the final
- // mapping.
- let tt_result = all_copies
- .remove(&target_rev)
- .expect("target revision was not processed");
- let mut result = PathCopies::default();
- for (dest, tt_source) in tt_result {
- if let Some(path) = tt_source.path {
- let path_dest = path_map.untokenize(dest).to_owned();
- let path_path = path_map.untokenize(path).to_owned();
- result.insert(path_dest, path_path);
+ /// Drop intermediate data (such as which revision a copy was from) and
+ /// return the final mapping.
+ pub fn finish(mut self, target_rev: Revision) -> PathCopies {
+ let tt_result = self
+ .all_copies
+ .remove(&target_rev)
+ .expect("target revision was not processed");
+ let mut result = PathCopies::default();
+ for (dest, tt_source) in tt_result {
+ if let Some(path) = tt_source.path {
+ let path_dest = self.path_map.untokenize(dest).to_owned();
+ let path_path = self.path_map.untokenize(path).to_owned();
+ result.insert(path_dest, path_path);
+ }
}
+ result
}
- result
}
/// fetch previous computed information
--- a/rust/hg-cpython/src/copy_tracing.rs Wed Dec 23 11:48:16 2020 +0100
+++ b/rust/hg-cpython/src/copy_tracing.rs Tue Jan 05 21:02:00 2021 +0100
@@ -8,11 +8,8 @@
use cpython::PyTuple;
use cpython::Python;
-use hg::copy_tracing::combine_changeset_copies;
use hg::copy_tracing::ChangedFiles;
-use hg::copy_tracing::DataHolder;
-use hg::copy_tracing::RevInfo;
-use hg::copy_tracing::RevInfoMaker;
+use hg::copy_tracing::CombineChangesetCopies;
use hg::Revision;
/// Combines copies information contained into revision `revs` to build a copy
@@ -26,64 +23,41 @@
target_rev: Revision,
rev_info: PyObject,
) -> PyResult<PyDict> {
- let revs: PyResult<_> =
- revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
-
- // Wrap the `rev_info_maker` python callback as a Rust closure
- //
- // No errors are expected from the Python side, and they will should only
- // happens in case of programing error or severe data corruption. Such
- // errors will raise panic and the rust-cpython harness will turn them into
- // Python exception.
- let rev_info_maker: RevInfoMaker<PyBytes> =
- Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
- let res: PyTuple = rev_info
- .call(py, (rev,), None)
- .expect("rust-copy-tracing: python call to `rev_info` failed")
- .cast_into(py)
- .expect(
- "rust-copy_tracing: python call to `rev_info` returned \
- unexpected non-Tuple value",
- );
- let p1 = res.get_item(py, 0).extract(py).expect(
- "rust-copy-tracing: rev_info return is invalid, first item \
- is a not a revision",
- );
- let p2 = res.get_item(py, 1).extract(py).expect(
- "rust-copy-tracing: rev_info return is invalid, first item \
- is a not a revision",
- );
-
- let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
- Ok(raw) => {
- // Give responsability for the raw bytes lifetime to
- // hg-core
- d.data = Some(raw);
- let addrs = d.data.as_ref().expect(
- "rust-copy-tracing: failed to get a reference to the \
- raw bytes for copy data").data(py);
- ChangedFiles::new(addrs)
- }
- // value was presumably None, meaning they was no copy data.
- Err(_) => ChangedFiles::new_empty(),
- };
-
- (p1, p2, files)
- });
- let children_count: PyResult<_> = children_count
+ let children_count = children_count
.items(py)
.iter()
.map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
- .collect();
+ .collect::<PyResult<_>>()?;
+
+ /// (Revision number, parent 1, parent 2, copy data for this revision)
+ type RevInfo = (Revision, Revision, Revision, Option<PyBytes>);
+
+ let revs_info = revs.iter(py).map(|rev_py| -> PyResult<RevInfo> {
+ let rev = rev_py.extract(py)?;
+ let tuple: PyTuple =
+ rev_info.call(py, (rev_py,), None)?.cast_into(py)?;
+ let p1 = tuple.get_item(py, 0).extract(py)?;
+ let p2 = tuple.get_item(py, 1).extract(py)?;
+ let opt_bytes = tuple.get_item(py, 2).extract(py)?;
+ Ok((rev, p1, p2, opt_bytes))
+ });
- let res = combine_changeset_copies(
- revs?,
- children_count?,
- target_rev,
- rev_info_maker,
- );
+ let mut combine_changeset_copies =
+ CombineChangesetCopies::new(children_count);
+
+ for rev_info in revs_info {
+ let (rev, p1, p2, opt_bytes) = rev_info?;
+ let files = match &opt_bytes {
+ Some(bytes) => ChangedFiles::new(bytes.data(py)),
+ // value was presumably None, meaning they was no copy data.
+ None => ChangedFiles::new_empty(),
+ };
+
+ combine_changeset_copies.add_revision(rev, p1, p2, files)
+ }
+ let path_copies = combine_changeset_copies.finish(target_rev);
let out = PyDict::new(py);
- for (dest, source) in res.into_iter() {
+ for (dest, source) in path_copies.into_iter() {
out.set_item(
py,
PyBytes::new(py, &dest.into_vec()),