Mercurial > hg
changeset 47953:8f031a274cd6
rust: Move PyBytesWithData out of copy-tracing code
So we can use it in other places to.
Replace its `.data()` method with the `Deref<Target = [u8]>` trait,
allowing this type to be used in generic contexts.
Rename the type accordingly.
Differential Revision: https://phab.mercurial-scm.org/D11395
author | Simon Sapin <simon.sapin@octobus.net> |
---|---|
date | Mon, 06 Sep 2021 13:39:54 +0200 |
parents | 9cd35c8c6044 |
children | 4afd6cc447b9 |
files | rust/hg-cpython/src/copy_tracing.rs rust/hg-cpython/src/lib.rs rust/hg-cpython/src/pybytes_deref.rs |
diffstat | 3 files changed, 58 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-cpython/src/copy_tracing.rs Mon Sep 06 11:39:59 2021 +0200 +++ b/rust/hg-cpython/src/copy_tracing.rs Mon Sep 06 13:39:54 2021 +0200 @@ -13,58 +13,7 @@ use hg::copy_tracing::CombineChangesetCopies; use hg::Revision; -use self::pybytes_with_data::PyBytesWithData; - -// Module to encapsulate private fields -mod pybytes_with_data { - use cpython::{PyBytes, Python}; - - /// Safe abstraction over a `PyBytes` together with the `&[u8]` slice - /// that borrows it. - /// - /// Calling `PyBytes::data` requires a GIL marker but we want to access the - /// data in a thread that (ideally) does not need to acquire the GIL. - /// This type allows separating the call an the use. - pub(super) struct PyBytesWithData { - #[allow(unused)] - keep_alive: PyBytes, - - /// Borrows the buffer inside `self.keep_alive`, - /// but the borrow-checker cannot express self-referential structs. - data: *const [u8], - } - - fn require_send<T: Send>() {} - - #[allow(unused)] - fn static_assert_pybytes_is_send() { - require_send::<PyBytes>; - } - - // Safety: PyBytes is Send. Raw pointers are not by default, - // but here sending one to another thread is fine since we ensure it stays - // valid. - unsafe impl Send for PyBytesWithData {} - - impl PyBytesWithData { - pub fn new(py: Python, bytes: PyBytes) -> Self { - Self { - data: bytes.data(py), - keep_alive: bytes, - } - } - - pub fn data(&self) -> &[u8] { - // Safety: the raw pointer is valid as long as the PyBytes is still - // alive, and the returned slice borrows `self`. - unsafe { &*self.data } - } - - pub fn unwrap(self) -> PyBytes { - self.keep_alive - } - } -} +use crate::pybytes_deref::PyBytesDeref; /// Combines copies information contained into revision `revs` to build a copy /// map. @@ -123,7 +72,7 @@ // // TODO: tweak the bound? let (rev_info_sender, rev_info_receiver) = - crossbeam_channel::bounded::<RevInfo<PyBytesWithData>>(1000); + crossbeam_channel::bounded::<RevInfo<PyBytesDeref>>(1000); // This channel (going the other way around) however is unbounded. // If they were both bounded, there might potentially be deadlocks @@ -143,7 +92,7 @@ CombineChangesetCopies::new(children_count); for (rev, p1, p2, opt_bytes) in rev_info_receiver { let files = match &opt_bytes { - Some(raw) => ChangedFiles::new(raw.data()), + Some(raw) => ChangedFiles::new(raw.as_ref()), // Python None was extracted to Option::None, // meaning there was no copy data. None => ChangedFiles::new_empty(), @@ -169,7 +118,7 @@ for rev_info in revs_info { let (rev, p1, p2, opt_bytes) = rev_info?; - let opt_bytes = opt_bytes.map(|b| PyBytesWithData::new(py, b)); + let opt_bytes = opt_bytes.map(|b| PyBytesDeref::new(py, b)); // We’d prefer to avoid the child thread calling into Python code, // but this avoids a potential deadlock on the GIL if it does:
--- a/rust/hg-cpython/src/lib.rs Mon Sep 06 11:39:59 2021 +0200 +++ b/rust/hg-cpython/src/lib.rs Mon Sep 06 13:39:54 2021 +0200 @@ -36,6 +36,7 @@ pub mod discovery; pub mod exceptions; pub mod parsers; +mod pybytes_deref; pub mod revlog; pub mod utils;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-cpython/src/pybytes_deref.rs Mon Sep 06 13:39:54 2021 +0200 @@ -0,0 +1,53 @@ +use cpython::{PyBytes, Python}; + +/// Safe abstraction over a `PyBytes` together with the `&[u8]` slice +/// that borrows it. Implements `Deref<Target = [u8]>`. +/// +/// Calling `PyBytes::data` requires a GIL marker but we want to access the +/// data in a thread that (ideally) does not need to acquire the GIL. +/// This type allows separating the call an the use. +/// +/// It also enables using a (wrapped) `PyBytes` in GIL-unaware generic code. +pub struct PyBytesDeref { + #[allow(unused)] + keep_alive: PyBytes, + + /// Borrows the buffer inside `self.keep_alive`, + /// but the borrow-checker cannot express self-referential structs. + data: *const [u8], +} + +impl PyBytesDeref { + pub fn new(py: Python, bytes: PyBytes) -> Self { + Self { + data: bytes.data(py), + keep_alive: bytes, + } + } + + pub fn unwrap(self) -> PyBytes { + self.keep_alive + } +} + +impl std::ops::Deref for PyBytesDeref { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + // Safety: the raw pointer is valid as long as the PyBytes is still + // alive, and the returned slice borrows `self`. + unsafe { &*self.data } + } +} + +fn require_send<T: Send>() {} + +#[allow(unused)] +fn static_assert_pybytes_is_send() { + require_send::<PyBytes>; +} + +// Safety: PyBytes is Send. Raw pointers are not by default, +// but here sending one to another thread is fine since we ensure it stays +// valid. +unsafe impl Send for PyBytesDeref {}