rust-cpython: bindings for MissingAncestors
authorGeorges Racinet <georges.racinet@octobus.net>
Fri, 30 Nov 2018 20:05:34 +0100
changeset 41188 006c9ce486fa
parent 41187 3bf6979a1785
child 41189 99125b7fb93e
rust-cpython: bindings for MissingAncestors The exposition is rather straightforward, except for the remove_ancestors_from() method, which forces us to an inefficient conversion between Python sets and Rust HashSets. Two alternatives are proposed in comments: - changing the inner API to "emit" the revision numbers to discard this would be a substantial change, and it would be better only in the cases where there are more to retain than to discard - mutating the Python set directly: this would force us to define an abstract `RevisionSet` trait, and implement it both for plain `HashSet` and for a struct enclosing a Python set with the GIL marker `Python<'p>`, also a non trivial effort. The main (and seemingly only) caller of this method being `mercurial.setdiscovery`, which is currently undergoing serious refactoring, it's not clear whether these improvements would be worth the effort right now, so we're leaving it as-is. Also, in `get_bases()` (will also be used by `setdiscovery`), we'd prefer to build a Python set directly, but we resort to returning a tuple, waiting to hear back from our PR onto rust-cpython about that Differential Revision: https://phab.mercurial-scm.org/D5550
rust/hg-cpython/src/ancestors.rs
tests/test-rust-ancestor.py
--- a/rust/hg-cpython/src/ancestors.rs	Wed Jan 09 17:31:36 2019 +0100
+++ b/rust/hg-cpython/src/ancestors.rs	Fri Nov 30 20:05:34 2018 +0100
@@ -15,22 +15,39 @@
 //!   The only difference is that it is instantiated with a C `parsers.index`
 //!   instance instead of a parents function.
 //!
+//! - [`MissingAncestors`] is the Rust implementation of
+//!   `mercurial.ancestor.incrementalmissingancestors`.
+//!
+//!   API differences:
+//!    + it is instantiated with a C `parsers.index`
+//!      instance instead of a parents function.
+//!    + `MissingAncestors.bases` is a method returning a tuple instead of
+//!      a set-valued attribute. We could return a Python set easily if our
+//!      [PySet PR](https://github.com/dgrunwald/rust-cpython/pull/165)
+//!      is accepted.
+//!
 //! - [`AncestorsIterator`] is the Rust counterpart of the
 //!   `ancestor._lazyancestorsiter` Python generator.
 //!   From Python, instances of this should be mainly obtained by calling
 //!   `iter()` on a [`LazyAncestors`] instance.
 //!
 //! [`LazyAncestors`]: struct.LazyAncestors.html
+//! [`MissingAncestors`]: struct.MissingAncestors.html
 //! [`AncestorsIterator`]: struct.AncestorsIterator.html
 use cindex::Index;
 use cpython::{
-    ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, Python,
+    ObjectProtocol, PyClone, PyDict, PyList, PyModule, PyObject,
+    PyResult, PyTuple, Python, PythonObject, ToPyObject,
 };
 use exceptions::GraphError;
 use hg::Revision;
-use hg::{AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy};
+use hg::{
+    AncestorsIterator as CoreIterator, LazyAncestors as CoreLazy,
+    MissingAncestors as CoreMissing,
+};
 use std::cell::RefCell;
 use std::iter::FromIterator;
+use std::collections::HashSet;
 
 /// Utility function to convert a Python iterable into various collections
 ///
@@ -119,7 +136,85 @@
 
 });
 
-/// Create the module, with `__package__` given from parent
+py_class!(pub class MissingAncestors |py| {
+    data inner: RefCell<Box<CoreMissing<Index>>>;
+
+    def __new__(_cls, index: PyObject, bases: PyObject) -> PyResult<MissingAncestors> {
+        let bases_vec: Vec<Revision> = rev_pyiter_collect(py, &bases)?;
+        let inner = CoreMissing::new(Index::new(py, index)?, bases_vec);
+        MissingAncestors::create_instance(py, RefCell::new(Box::new(inner)))
+    }
+
+    def hasbases(&self) -> PyResult<bool> {
+        Ok(self.inner(py).borrow().has_bases())
+    }
+
+    def addbases(&self, bases: PyObject) -> PyResult<PyObject> {
+        let mut inner = self.inner(py).borrow_mut();
+        let bases_vec: Vec<Revision> = rev_pyiter_collect(py, &bases)?;
+        inner.add_bases(bases_vec);
+        // cpython doc has examples with PyResult<()> but this gives me
+        //   the trait `cpython::ToPyObject` is not implemented for `()`
+        // so let's return an explicit None
+        Ok(py.None())
+    }
+
+    def bases(&self) -> PyResult<PyTuple> {
+        let inner = self.inner(py).borrow();
+        let bases_set = inner.get_bases();
+        // convert as Python tuple TODO how to return a proper Python set?
+        let mut bases_vec: Vec<PyObject> = Vec::with_capacity(
+            bases_set.len());
+        for rev in bases_set {
+            bases_vec.push(rev.to_py_object(py).into_object());
+        }
+        Ok(PyTuple::new(py, bases_vec.as_slice()))
+    }
+
+    def removeancestorsfrom(&self, revs: PyObject) -> PyResult<PyObject> {
+        let mut inner = self.inner(py).borrow_mut();
+        // this is very lame: we convert to a Rust set, update it in place
+        // and then convert back to Python, only to have Python remove the
+        // excess (thankfully, Python is happy with a list or even an iterator)
+        // Leads to improve this:
+        //  - have the CoreMissing instead do something emit revisions to
+        //    discard
+        //  - define a trait for sets of revisions in the core and implement
+        //    it for a Python set rewrapped with the GIL marker
+        let mut revs_pyset: HashSet<Revision> = rev_pyiter_collect(py, &revs)?;
+        inner.remove_ancestors_from(&mut revs_pyset)
+            .map_err(|e| GraphError::pynew(py, e))?;
+
+        // convert as Python list
+        let mut remaining_pyint_vec: Vec<PyObject> = Vec::with_capacity(
+            revs_pyset.len());
+        for rev in revs_pyset {
+            remaining_pyint_vec.push(rev.to_py_object(py).into_object());
+        }
+        let remaining_pylist = PyList::new(py, remaining_pyint_vec.as_slice());
+        revs.call_method(py, "intersection_update", (remaining_pylist, ), None)
+    }
+
+    def missingancestors(&self, revs: PyObject) -> PyResult<PyList> {
+        let mut inner = self.inner(py).borrow_mut();
+        let revs_vec: Vec<Revision> = rev_pyiter_collect(py, &revs)?;
+        let missing_vec = match inner.missing_ancestors(revs_vec) {
+            Ok(missing) => missing,
+            Err(e) => {
+                return Err(GraphError::pynew(py, e));
+            }
+        };
+        // convert as Python list
+        let mut missing_pyint_vec: Vec<PyObject> = Vec::with_capacity(
+            missing_vec.len());
+        for rev in missing_vec {
+            missing_pyint_vec.push(rev.to_py_object(py).into_object());
+        }
+        Ok(PyList::new(py, missing_pyint_vec.as_slice()))
+    }
+});
+
+/// Create the module, with __package__ given from parent
 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
     let dotted_name = &format!("{}.ancestor", package);
     let m = PyModule::new(py, dotted_name)?;
@@ -131,6 +226,7 @@
     )?;
     m.add_class::<AncestorsIterator>(py)?;
     m.add_class::<LazyAncestors>(py)?;
+    m.add_class::<MissingAncestors>(py)?;
 
     let sys = PyModule::import(py, "sys")?;
     let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
--- a/tests/test-rust-ancestor.py	Wed Jan 09 17:31:36 2019 +0100
+++ b/tests/test-rust-ancestor.py	Fri Nov 30 20:05:34 2018 +0100
@@ -11,7 +11,8 @@
     # this would fail already without appropriate ancestor.__package__
     from mercurial.rustext.ancestor import (
         AncestorsIterator,
-        LazyAncestors
+        LazyAncestors,
+        MissingAncestors,
     )
 
 try:
@@ -105,6 +106,22 @@
         # let's check bool for an empty one
         self.assertFalse(LazyAncestors(idx, [0], 0, False))
 
+    def testmissingancestors(self):
+        idx = self.parseindex()
+        missanc = MissingAncestors(idx, [1])
+        self.assertTrue(missanc.hasbases())
+        self.assertEqual(missanc.missingancestors([3]), [2, 3])
+        missanc.addbases({2})
+        self.assertEqual(set(missanc.bases()), {1, 2})
+        self.assertEqual(missanc.missingancestors([3]), [3])
+
+    def testmissingancestorsremove(self):
+        idx = self.parseindex()
+        missanc = MissingAncestors(idx, [1])
+        revs = {0, 1, 2, 3}
+        missanc.removeancestorsfrom(revs)
+        self.assertEqual(revs, {2, 3})
+
     def testrefcount(self):
         idx = self.parseindex()
         start_count = sys.getrefcount(idx)