changeset 42752:30320c7bf79f

rust-cpython: add macro for sharing references Following an experiment done by Georges Racinet, we now have a working way of sharing references between Python and Rust. This is needed in many points of the codebase, for example every time we need to expose an iterator to a Rust-backed Python class. In a few words, references are (unsafely) marked as `'static` and coupled with manual reference counting; we are doing manual borrow-checking. This changes introduces two declarative macro to help reduce boilerplate. While it is better than not using macros, they are not perfect. They need to: - Integrate with the garbage collector for container types (not needed as of yet), as stated in the docstring - Allow for leaking multiple attributes at the same time - Inject the `py_shared_state` data attribute in `py_class`-generated structs - Automatically namespace the functions and attributes they generate For at least the last two points, we will need to write a procedural macro instead of a declarative one. While this reference-sharing mechanism is being ironed out I thought it best not to implement it yet. Lastly, and implementation detail renders our Rust-backed Python iterators too strict to be proper drop-in replacements, as will be illustrated in a future patch: if the data structure referenced by a non-depleted iterator is mutated, an `AlreadyBorrowed` exception is raised, whereas Python would allow it, only to raise a `RuntimeError` if `next` is called on said iterator. This will have to be addressed at some point. Differential Revision: https://phab.mercurial-scm.org/D6631
author Raphaël Gomès <rgomes@octobus.net>
date Tue, 09 Jul 2019 15:15:54 +0200
parents 4b3b27d567d5
children fce6dc93a510
files rust/hg-cpython/src/dirstate/dirs_multiset.rs rust/hg-cpython/src/exceptions.rs rust/hg-cpython/src/lib.rs rust/hg-cpython/src/ref_sharing.rs
diffstat 4 files changed, 419 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/rust/hg-cpython/src/dirstate/dirs_multiset.rs	Tue Jul 09 14:53:34 2019 +0200
+++ b/rust/hg-cpython/src/dirstate/dirs_multiset.rs	Tue Jul 09 15:15:54 2019 +0200
@@ -9,21 +9,22 @@
 //! `hg-core` package.
 
 use std::cell::RefCell;
+use std::convert::TryInto;
 
 use cpython::{
-    exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyObject, PyResult,
-    PythonObject, ToPyObject,
+    exc, ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyObject, PyResult,
+    Python,
 };
 
-use crate::dirstate::extract_dirstate;
+use crate::{dirstate::extract_dirstate, ref_sharing::PySharedState};
 use hg::{
     DirsIterable, DirsMultiset, DirstateMapError, DirstateParseError,
     EntryState,
 };
-use std::convert::TryInto;
 
 py_class!(pub class Dirs |py| {
-    data dirs_map: RefCell<DirsMultiset>;
+    data inner: RefCell<DirsMultiset>;
+    data py_shared_state: PySharedState;
 
     // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes
     // a `list`)
@@ -59,18 +60,22 @@
             )
         };
 
-        Self::create_instance(py, RefCell::new(inner))
+        Self::create_instance(
+            py,
+            RefCell::new(inner),
+            PySharedState::default()
+        )
     }
 
     def addpath(&self, path: PyObject) -> PyResult<PyObject> {
-        self.dirs_map(py).borrow_mut().add_path(
+        self.borrow_mut(py)?.add_path(
             path.extract::<PyBytes>(py)?.data(py),
         );
         Ok(py.None())
     }
 
     def delpath(&self, path: PyObject) -> PyResult<PyObject> {
-        self.dirs_map(py).borrow_mut().delete_path(
+        self.borrow_mut(py)?.delete_path(
             path.extract::<PyBytes>(py)?.data(py),
         )
             .and(Ok(py.None()))
@@ -88,26 +93,38 @@
                 }
             })
     }
-
-    // This is really inefficient on top of being ugly, but it's an easy way
-    // of having it work to continue working on the rest of the module
-    // hopefully bypassing Python entirely pretty soon.
-    def __iter__(&self) -> PyResult<PyObject> {
-        let dirs = self.dirs_map(py).borrow();
-        let dirs: Vec<_> = dirs
-            .iter()
-            .map(|d| PyBytes::new(py, d))
-            .collect();
-        dirs.to_py_object(py)
-            .into_object()
-            .iter(py)
-            .map(|o| o.into_object())
+    def __iter__(&self) -> PyResult<DirsMultisetKeysIterator> {
+        DirsMultisetKeysIterator::create_instance(
+            py,
+            RefCell::new(Some(DirsMultisetLeakedRef::new(py, &self))),
+            RefCell::new(Box::new(self.leak_immutable(py)?.iter())),
+        )
     }
 
     def __contains__(&self, item: PyObject) -> PyResult<bool> {
         Ok(self
-            .dirs_map(py)
+            .inner(py)
             .borrow()
             .contains(item.extract::<PyBytes>(py)?.data(py).as_ref()))
     }
 });
+
+py_shared_ref!(Dirs, DirsMultiset, inner, DirsMultisetLeakedRef,);
+
+impl Dirs {
+    pub fn from_inner(py: Python, d: DirsMultiset) -> PyResult<Self> {
+        Self::create_instance(py, RefCell::new(d), PySharedState::default())
+    }
+
+    fn translate_key(py: Python, res: &Vec<u8>) -> PyResult<Option<PyBytes>> {
+        Ok(Some(PyBytes::new(py, res)))
+    }
+}
+
+py_shared_sequence_iterator!(
+    DirsMultisetKeysIterator,
+    DirsMultisetLeakedRef,
+    Vec<u8>,
+    Dirs::translate_key,
+    Option<PyBytes>
+);
--- a/rust/hg-cpython/src/exceptions.rs	Tue Jul 09 14:53:34 2019 +0200
+++ b/rust/hg-cpython/src/exceptions.rs	Tue Jul 09 15:15:54 2019 +0200
@@ -67,3 +67,5 @@
         }
     }
 }
+
+py_exception!(shared_ref, AlreadyBorrowed, RuntimeError);
--- a/rust/hg-cpython/src/lib.rs	Tue Jul 09 14:53:34 2019 +0200
+++ b/rust/hg-cpython/src/lib.rs	Tue Jul 09 15:15:54 2019 +0200
@@ -27,6 +27,8 @@
 pub mod ancestors;
 mod cindex;
 mod conversion;
+#[macro_use]
+pub mod ref_sharing;
 pub mod dagops;
 pub mod dirstate;
 pub mod parsers;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-cpython/src/ref_sharing.rs	Tue Jul 09 15:15:54 2019 +0200
@@ -0,0 +1,375 @@
+// macros.rs
+//
+// Copyright 2019 Raphaël Gomès <rgomes@octobus.net>
+//
+// This software may be used and distributed according to the terms of the
+// GNU General Public License version 2 or any later version.
+
+//! Macros for use in the `hg-cpython` bridge library.
+
+use crate::exceptions::AlreadyBorrowed;
+use cpython::{PyResult, Python};
+use std::cell::{Cell, RefCell, RefMut};
+
+/// Manages the shared state between Python and Rust
+#[derive(Default)]
+pub struct PySharedState {
+    leak_count: Cell<usize>,
+    mutably_borrowed: Cell<bool>,
+}
+
+impl PySharedState {
+    pub fn borrow_mut<'a, T>(
+        &'a self,
+        py: Python<'a>,
+        pyrefmut: RefMut<'a, T>,
+    ) -> PyResult<PyRefMut<'a, T>> {
+        if self.mutably_borrowed.get() {
+            return Err(AlreadyBorrowed::new(
+                py,
+                "Cannot borrow mutably while there exists another \
+                 mutable reference in a Python object",
+            ));
+        }
+        match self.leak_count.get() {
+            0 => {
+                self.mutably_borrowed.replace(true);
+                Ok(PyRefMut::new(py, pyrefmut, self))
+            }
+            // TODO
+            // For now, this works differently than Python references
+            // in the case of iterators.
+            // Python does not complain when the data an iterator
+            // points to is modified if the iterator is never used
+            // afterwards.
+            // Here, we are stricter than this by refusing to give a
+            // mutable reference if it is already borrowed.
+            // While the additional safety might be argued for, it
+            // breaks valid programming patterns in Python and we need
+            // to fix this issue down the line.
+            _ => Err(AlreadyBorrowed::new(
+                py,
+                "Cannot borrow mutably while there are \
+                 immutable references in Python objects",
+            )),
+        }
+    }
+
+    /// Return a reference to the wrapped data with an artificial static
+    /// lifetime.
+    /// We need to be protected by the GIL for thread-safety.
+    pub fn leak_immutable<T>(
+        &self,
+        py: Python,
+        data: &RefCell<T>,
+    ) -> PyResult<&'static T> {
+        if self.mutably_borrowed.get() {
+            return Err(AlreadyBorrowed::new(
+                py,
+                "Cannot borrow immutably while there is a \
+                 mutable reference in Python objects",
+            ));
+        }
+        let ptr = data.as_ptr();
+        self.leak_count.replace(self.leak_count.get() + 1);
+        unsafe { Ok(&*ptr) }
+    }
+
+    pub fn decrease_leak_count(&self, _py: Python, mutable: bool) {
+        self.leak_count
+            .replace(self.leak_count.get().saturating_sub(1));
+        if mutable {
+            self.mutably_borrowed.replace(false);
+        }
+    }
+}
+
+/// Holds a mutable reference to data shared between Python and Rust.
+pub struct PyRefMut<'a, T> {
+    inner: RefMut<'a, T>,
+    py_shared_state: &'a PySharedState,
+}
+
+impl<'a, T> PyRefMut<'a, T> {
+    fn new(
+        _py: Python<'a>,
+        inner: RefMut<'a, T>,
+        py_shared_state: &'a PySharedState,
+    ) -> Self {
+        Self {
+            inner,
+            py_shared_state,
+        }
+    }
+}
+
+impl<'a, T> std::ops::Deref for PyRefMut<'a, T> {
+    type Target = RefMut<'a, T>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+impl<'a, T> std::ops::DerefMut for PyRefMut<'a, T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.inner
+    }
+}
+
+impl<'a, T> Drop for PyRefMut<'a, T> {
+    fn drop(&mut self) {
+        let gil = Python::acquire_gil();
+        let py = gil.python();
+        self.py_shared_state.decrease_leak_count(py, true);
+    }
+}
+
+/// Allows a `py_class!` generated struct to share references to one of its
+/// data members with Python.
+///
+/// # Warning
+///
+/// The targeted `py_class!` needs to have the
+/// `data py_shared_state: PySharedState;` data attribute to compile.
+/// A better, more complicated macro is needed to automatically insert it,
+/// but this one is not yet really battle tested (what happens when
+/// multiple references are needed?). See the example below.
+///
+/// TODO allow Python container types: for now, integration with the garbage
+///     collector does not extend to Rust structs holding references to Python
+///     objects. Should the need surface, `__traverse__` and `__clear__` will
+///     need to be written as per the `rust-cpython` docs on GC integration.
+///
+/// # Parameters
+///
+/// * `$name` is the same identifier used in for `py_class!` macro call.
+/// * `$inner_struct` is the identifier of the underlying Rust struct
+/// * `$data_member` is the identifier of the data member of `$inner_struct`
+/// that will be shared.
+/// * `$leaked` is the identifier to give to the struct that will manage
+/// references to `$name`, to be used for example in other macros like
+/// `py_shared_mapping_iterator`.
+///
+/// # Example
+///
+/// ```
+/// struct MyStruct {
+///     inner: Vec<u32>;
+/// }
+///
+/// py_class!(pub class MyType |py| {
+///     data inner: RefCell<MyStruct>;
+///     data py_shared_state: PySharedState;
+/// });
+///
+/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef);
+/// ```
+macro_rules! py_shared_ref {
+    (
+        $name: ident,
+        $inner_struct: ident,
+        $data_member: ident,
+        $leaked: ident,
+    ) => {
+        impl $name {
+            fn borrow_mut<'a>(
+                &'a self,
+                py: Python<'a>,
+            ) -> PyResult<crate::ref_sharing::PyRefMut<'a, $inner_struct>>
+            {
+                self.py_shared_state(py)
+                    .borrow_mut(py, self.$data_member(py).borrow_mut())
+            }
+
+            fn leak_immutable<'a>(
+                &'a self,
+                py: Python<'a>,
+            ) -> PyResult<&'static $inner_struct> {
+                self.py_shared_state(py)
+                    .leak_immutable(py, self.$data_member(py))
+            }
+        }
+
+        /// Manage immutable references to `$name` leaked into Python
+        /// iterators.
+        ///
+        /// In truth, this does not represent leaked references themselves;
+        /// it is instead useful alongside them to manage them.
+        pub struct $leaked {
+            inner: $name,
+        }
+
+        impl $leaked {
+            fn new(py: Python, inner: &$name) -> Self {
+                Self {
+                    inner: inner.clone_ref(py),
+                }
+            }
+        }
+
+        impl Drop for $leaked {
+            fn drop(&mut self) {
+                let gil = Python::acquire_gil();
+                let py = gil.python();
+                self.inner
+                    .py_shared_state(py)
+                    .decrease_leak_count(py, false);
+            }
+        }
+    };
+}
+
+/// Defines a `py_class!` that acts as a Python iterator over a Rust iterator.
+macro_rules! py_shared_iterator_impl {
+    (
+        $name: ident,
+        $leaked: ident,
+        $iterator_type: ty,
+        $success_func: expr,
+        $success_type: ty
+    ) => {
+        py_class!(pub class $name |py| {
+            data inner: RefCell<Option<$leaked>>;
+            data it: RefCell<$iterator_type>;
+
+            def __next__(&self) -> PyResult<$success_type> {
+                let mut inner_opt = self.inner(py).borrow_mut();
+                if inner_opt.is_some() {
+                    match self.it(py).borrow_mut().next() {
+                        None => {
+                            // replace Some(inner) by None, drop $leaked
+                            inner_opt.take();
+                            Ok(None)
+                        }
+                        Some(res) => {
+                            $success_func(py, res)
+                        }
+                    }
+                } else {
+                    Ok(None)
+                }
+            }
+
+            def __iter__(&self) -> PyResult<Self> {
+                Ok(self.clone_ref(py))
+            }
+        });
+
+        impl $name {
+            pub fn from_inner(
+                py: Python,
+                leaked: Option<$leaked>,
+                it: $iterator_type
+            ) -> PyResult<Self> {
+                Self::create_instance(
+                    py,
+                    RefCell::new(leaked),
+                    RefCell::new(it)
+                )
+            }
+        }
+    };
+}
+
+/// Defines a `py_class!` that acts as a Python mapping iterator over a Rust
+/// iterator.
+///
+/// TODO: this is a bit awkward to use, and a better (more complicated)
+///     procedural macro would simplify the interface a lot.
+///
+/// # Parameters
+///
+/// * `$name` is the identifier to give to the resulting Rust struct.
+/// * `$leaked` corresponds to `$leaked` in the matching `py_shared_ref!` call.
+/// * `$key_type` is the type of the key in the mapping
+/// * `$value_type` is the type of the value in the mapping
+/// * `$success_func` is a function for processing the Rust `(key, value)`
+/// tuple on iteration success, turning it into something Python understands.
+/// * `$success_func` is the return type of `$success_func`
+///
+/// # Example
+///
+/// ```
+/// struct MyStruct {
+///     inner: HashMap<Vec<u8>, Vec<u8>>;
+/// }
+///
+/// py_class!(pub class MyType |py| {
+///     data inner: RefCell<MyStruct>;
+///     data py_shared_state: PySharedState;
+///
+///     def __iter__(&self) -> PyResult<MyTypeItemsIterator> {
+///         MyTypeItemsIterator::create_instance(
+///             py,
+///             RefCell::new(Some(MyTypeLeakedRef::new(py, &self))),
+///             RefCell::new(self.leak_immutable(py).iter()),
+///         )
+///     }
+/// });
+///
+/// impl MyType {
+///     fn translate_key_value(
+///         py: Python,
+///         res: (&Vec<u8>, &Vec<u8>),
+///     ) -> PyResult<Option<(PyBytes, PyBytes)>> {
+///         let (f, entry) = res;
+///         Ok(Some((
+///             PyBytes::new(py, f),
+///             PyBytes::new(py, entry),
+///         )))
+///     }
+/// }
+///
+/// py_shared_ref!(MyType, MyStruct, inner, MyTypeLeakedRef);
+///
+/// py_shared_mapping_iterator!(
+///     MyTypeItemsIterator,
+///     MyTypeLeakedRef,
+///     Vec<u8>,
+///     Vec<u8>,
+///     MyType::translate_key_value,
+///     Option<(PyBytes, PyBytes)>
+/// );
+/// ```
+#[allow(unused)] // Removed in a future patch
+macro_rules! py_shared_mapping_iterator {
+    (
+        $name:ident,
+        $leaked:ident,
+        $key_type: ty,
+        $value_type: ty,
+        $success_func: path,
+        $success_type: ty
+    ) => {
+        py_shared_iterator_impl!(
+            $name,
+            $leaked,
+            Box<
+                Iterator<Item = (&'static $key_type, &'static $value_type)>
+                    + Send,
+            >,
+            $success_func,
+            $success_type
+        );
+    };
+}
+
+/// Works basically the same as `py_shared_mapping_iterator`, but with only a
+/// key.
+macro_rules! py_shared_sequence_iterator {
+    (
+        $name:ident,
+        $leaked:ident,
+        $key_type: ty,
+        $success_func: path,
+        $success_type: ty
+    ) => {
+        py_shared_iterator_impl!(
+            $name,
+            $leaked,
+            Box<Iterator<Item = &'static $key_type> + Send>,
+            $success_func,
+            $success_type
+        );
+    };
+}