Mercurial > hg-stable
changeset 46626:cb4b0b0c6de4
copies-rust: split up combine_changeset_copies function into a struct
… such that each iteration of its former loop is now a method call,
with the caller driving the loop.
This entirely removes the need for the `DataHolder` hack:
the method now takes a `ChangedFiles<'_>` parameter that borrows
a bytes buffer that can be owned by the caller’s stack frame,
just for the duration of that call.
Differential Revision: https://phab.mercurial-scm.org/D9683
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Tue, 05 Jan 2021 21:02:00 +0100 |
parents | 435d9fc72646 |
children | 47557ea79fc7 |
files | rust/hg-core/src/copy_tracing.rs rust/hg-cpython/src/copy_tracing.rs |
diffstat | 2 files changed, 85 insertions(+), 123 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/copy_tracing.rs Wed Dec 23 11:48:16 2020 +0100 +++ b/rust/hg-core/src/copy_tracing.rs Tue Jan 05 21:02:00 2021 +0100 @@ -110,9 +110,6 @@ /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation) type InternalPathCopies = OrdMap<PathToken, CopySource>; -/// hold parent 1, parent 2 and relevant files actions. -pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>); - /// represent the files affected by a changesets /// /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need @@ -334,22 +331,6 @@ } } -/// A small struct whose purpose is to ensure lifetime of bytes referenced in -/// ChangedFiles -/// -/// It is passed to the RevInfoMaker callback who can assign any necessary -/// content to the `data` attribute. The copy tracing code is responsible for -/// keeping the DataHolder alive at least as long as the ChangedFiles object. -pub struct DataHolder<D> { - /// RevInfoMaker callback should assign data referenced by the - /// ChangedFiles struct it return to this attribute. The DataHolder - /// lifetime will be at least as long as the ChangedFiles one. - pub data: Option<D>, -} - -pub type RevInfoMaker<'a, D> = - Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>; - /// A small "tokenizer" responsible of turning full HgPath into lighter /// PathToken /// @@ -382,82 +363,89 @@ } /// Same as mercurial.copies._combine_changeset_copies, but in Rust. -/// -/// Arguments are: -/// -/// revs: all revisions to be considered -/// children: a {parent ? [childrens]} mapping -/// target_rev: the final revision we are combining copies to -/// rev_info(rev): callback to get revision information: -/// * first parent -/// * second parent -/// * ChangedFiles -/// isancestors(low_rev, high_rev): callback to check if a revision is an -/// ancestor of another -pub fn combine_changeset_copies<D>( - revs: Vec<Revision>, - mut children_count: HashMap<Revision, usize>, - target_rev: Revision, - rev_info: RevInfoMaker<D>, -) -> PathCopies { - let mut all_copies = HashMap::new(); +pub struct CombineChangesetCopies { + all_copies: HashMap<Revision, InternalPathCopies>, + path_map: TwoWayPathMap, + children_count: HashMap<Revision, usize>, +} - let mut path_map = TwoWayPathMap::default(); +impl CombineChangesetCopies { + pub fn new(children_count: HashMap<Revision, usize>) -> Self { + Self { + all_copies: HashMap::new(), + path_map: TwoWayPathMap::default(), + children_count, + } + } - for rev in revs { - let mut d: DataHolder<D> = DataHolder { data: None }; - let (p1, p2, changes) = rev_info(rev, &mut d); - - // We will chain the copies information accumulated for the parent with - // the individual copies information the curent revision. Creating a - // new TimeStampedPath for each `rev` → `children` vertex. + /// Combined the given `changes` data specific to `rev` with the data + /// previously given for its parents (and transitively, its ancestors). + pub fn add_revision( + &mut self, + rev: Revision, + p1: Revision, + p2: Revision, + changes: ChangedFiles<'_>, + ) { // Retrieve data computed in a previous iteration let p1_copies = match p1 { NULL_REVISION => None, _ => get_and_clean_parent_copies( - &mut all_copies, - &mut children_count, + &mut self.all_copies, + &mut self.children_count, p1, ), // will be None if the vertex is not to be traversed }; let p2_copies = match p2 { NULL_REVISION => None, _ => get_and_clean_parent_copies( - &mut all_copies, - &mut children_count, + &mut self.all_copies, + &mut self.children_count, p2, ), // will be None if the vertex is not to be traversed }; // combine it with data for that revision - let (p1_copies, p2_copies) = - chain_changes(&mut path_map, p1_copies, p2_copies, &changes, rev); + let (p1_copies, p2_copies) = chain_changes( + &mut self.path_map, + p1_copies, + p2_copies, + &changes, + rev, + ); let copies = match (p1_copies, p2_copies) { (None, None) => None, (c, None) => c, (None, c) => c, (Some(p1_copies), Some(p2_copies)) => Some(merge_copies_dict( - &path_map, rev, p2_copies, p1_copies, &changes, + &self.path_map, + rev, + p2_copies, + p1_copies, + &changes, )), }; if let Some(c) = copies { - all_copies.insert(rev, c); + self.all_copies.insert(rev, c); } } - // Drop internal information (like the timestamp) and return the final - // mapping. - let tt_result = all_copies - .remove(&target_rev) - .expect("target revision was not processed"); - let mut result = PathCopies::default(); - for (dest, tt_source) in tt_result { - if let Some(path) = tt_source.path { - let path_dest = path_map.untokenize(dest).to_owned(); - let path_path = path_map.untokenize(path).to_owned(); - result.insert(path_dest, path_path); + /// Drop intermediate data (such as which revision a copy was from) and + /// return the final mapping. + pub fn finish(mut self, target_rev: Revision) -> PathCopies { + let tt_result = self + .all_copies + .remove(&target_rev) + .expect("target revision was not processed"); + let mut result = PathCopies::default(); + for (dest, tt_source) in tt_result { + if let Some(path) = tt_source.path { + let path_dest = self.path_map.untokenize(dest).to_owned(); + let path_path = self.path_map.untokenize(path).to_owned(); + result.insert(path_dest, path_path); + } } + result } - result } /// fetch previous computed information
--- a/rust/hg-cpython/src/copy_tracing.rs Wed Dec 23 11:48:16 2020 +0100 +++ b/rust/hg-cpython/src/copy_tracing.rs Tue Jan 05 21:02:00 2021 +0100 @@ -8,11 +8,8 @@ use cpython::PyTuple; use cpython::Python; -use hg::copy_tracing::combine_changeset_copies; use hg::copy_tracing::ChangedFiles; -use hg::copy_tracing::DataHolder; -use hg::copy_tracing::RevInfo; -use hg::copy_tracing::RevInfoMaker; +use hg::copy_tracing::CombineChangesetCopies; use hg::Revision; /// Combines copies information contained into revision `revs` to build a copy @@ -26,64 +23,41 @@ target_rev: Revision, rev_info: PyObject, ) -> PyResult<PyDict> { - let revs: PyResult<_> = - revs.iter(py).map(|r| Ok(r.extract(py)?)).collect(); - - // Wrap the `rev_info_maker` python callback as a Rust closure - // - // No errors are expected from the Python side, and they will should only - // happens in case of programing error or severe data corruption. Such - // errors will raise panic and the rust-cpython harness will turn them into - // Python exception. - let rev_info_maker: RevInfoMaker<PyBytes> = - Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo { - let res: PyTuple = rev_info - .call(py, (rev,), None) - .expect("rust-copy-tracing: python call to `rev_info` failed") - .cast_into(py) - .expect( - "rust-copy_tracing: python call to `rev_info` returned \ - unexpected non-Tuple value", - ); - let p1 = res.get_item(py, 0).extract(py).expect( - "rust-copy-tracing: rev_info return is invalid, first item \ - is a not a revision", - ); - let p2 = res.get_item(py, 1).extract(py).expect( - "rust-copy-tracing: rev_info return is invalid, first item \ - is a not a revision", - ); - - let files = match res.get_item(py, 2).extract::<PyBytes>(py) { - Ok(raw) => { - // Give responsability for the raw bytes lifetime to - // hg-core - d.data = Some(raw); - let addrs = d.data.as_ref().expect( - "rust-copy-tracing: failed to get a reference to the \ - raw bytes for copy data").data(py); - ChangedFiles::new(addrs) - } - // value was presumably None, meaning they was no copy data. - Err(_) => ChangedFiles::new_empty(), - }; - - (p1, p2, files) - }); - let children_count: PyResult<_> = children_count + let children_count = children_count .items(py) .iter() .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?))) - .collect(); + .collect::<PyResult<_>>()?; + + /// (Revision number, parent 1, parent 2, copy data for this revision) + type RevInfo = (Revision, Revision, Revision, Option<PyBytes>); + + let revs_info = revs.iter(py).map(|rev_py| -> PyResult<RevInfo> { + let rev = rev_py.extract(py)?; + let tuple: PyTuple = + rev_info.call(py, (rev_py,), None)?.cast_into(py)?; + let p1 = tuple.get_item(py, 0).extract(py)?; + let p2 = tuple.get_item(py, 1).extract(py)?; + let opt_bytes = tuple.get_item(py, 2).extract(py)?; + Ok((rev, p1, p2, opt_bytes)) + }); - let res = combine_changeset_copies( - revs?, - children_count?, - target_rev, - rev_info_maker, - ); + let mut combine_changeset_copies = + CombineChangesetCopies::new(children_count); + + for rev_info in revs_info { + let (rev, p1, p2, opt_bytes) = rev_info?; + let files = match &opt_bytes { + Some(bytes) => ChangedFiles::new(bytes.data(py)), + // value was presumably None, meaning they was no copy data. + None => ChangedFiles::new_empty(), + }; + + combine_changeset_copies.add_revision(rev, p1, p2, files) + } + let path_copies = combine_changeset_copies.finish(target_rev); let out = PyDict::new(py); - for (dest, source) in res.into_iter() { + for (dest, source) in path_copies.into_iter() { out.set_item( py, PyBytes::new(py, &dest.into_vec()),