# HG changeset patch # User Simon Sapin # Date 1630928394 -7200 # Node ID 8f031a274cd60c6833b142f8ea274bcf8274b43f # Parent 9cd35c8c60449273adcbdf2373b8604f7775a0f1 rust: Move PyBytesWithData out of copy-tracing code So we can use it in other places to. Replace its `.data()` method with the `Deref` trait, allowing this type to be used in generic contexts. Rename the type accordingly. Differential Revision: https://phab.mercurial-scm.org/D11395 diff -r 9cd35c8c6044 -r 8f031a274cd6 rust/hg-cpython/src/copy_tracing.rs --- a/rust/hg-cpython/src/copy_tracing.rs Mon Sep 06 11:39:59 2021 +0200 +++ b/rust/hg-cpython/src/copy_tracing.rs Mon Sep 06 13:39:54 2021 +0200 @@ -13,58 +13,7 @@ use hg::copy_tracing::CombineChangesetCopies; use hg::Revision; -use self::pybytes_with_data::PyBytesWithData; - -// Module to encapsulate private fields -mod pybytes_with_data { - use cpython::{PyBytes, Python}; - - /// Safe abstraction over a `PyBytes` together with the `&[u8]` slice - /// that borrows it. - /// - /// Calling `PyBytes::data` requires a GIL marker but we want to access the - /// data in a thread that (ideally) does not need to acquire the GIL. - /// This type allows separating the call an the use. - pub(super) struct PyBytesWithData { - #[allow(unused)] - keep_alive: PyBytes, - - /// Borrows the buffer inside `self.keep_alive`, - /// but the borrow-checker cannot express self-referential structs. - data: *const [u8], - } - - fn require_send() {} - - #[allow(unused)] - fn static_assert_pybytes_is_send() { - require_send::; - } - - // Safety: PyBytes is Send. Raw pointers are not by default, - // but here sending one to another thread is fine since we ensure it stays - // valid. - unsafe impl Send for PyBytesWithData {} - - impl PyBytesWithData { - pub fn new(py: Python, bytes: PyBytes) -> Self { - Self { - data: bytes.data(py), - keep_alive: bytes, - } - } - - pub fn data(&self) -> &[u8] { - // Safety: the raw pointer is valid as long as the PyBytes is still - // alive, and the returned slice borrows `self`. - unsafe { &*self.data } - } - - pub fn unwrap(self) -> PyBytes { - self.keep_alive - } - } -} +use crate::pybytes_deref::PyBytesDeref; /// Combines copies information contained into revision `revs` to build a copy /// map. @@ -123,7 +72,7 @@ // // TODO: tweak the bound? let (rev_info_sender, rev_info_receiver) = - crossbeam_channel::bounded::>(1000); + crossbeam_channel::bounded::>(1000); // This channel (going the other way around) however is unbounded. // If they were both bounded, there might potentially be deadlocks @@ -143,7 +92,7 @@ CombineChangesetCopies::new(children_count); for (rev, p1, p2, opt_bytes) in rev_info_receiver { let files = match &opt_bytes { - Some(raw) => ChangedFiles::new(raw.data()), + Some(raw) => ChangedFiles::new(raw.as_ref()), // Python None was extracted to Option::None, // meaning there was no copy data. None => ChangedFiles::new_empty(), @@ -169,7 +118,7 @@ for rev_info in revs_info { let (rev, p1, p2, opt_bytes) = rev_info?; - let opt_bytes = opt_bytes.map(|b| PyBytesWithData::new(py, b)); + let opt_bytes = opt_bytes.map(|b| PyBytesDeref::new(py, b)); // We’d prefer to avoid the child thread calling into Python code, // but this avoids a potential deadlock on the GIL if it does: diff -r 9cd35c8c6044 -r 8f031a274cd6 rust/hg-cpython/src/lib.rs --- a/rust/hg-cpython/src/lib.rs Mon Sep 06 11:39:59 2021 +0200 +++ b/rust/hg-cpython/src/lib.rs Mon Sep 06 13:39:54 2021 +0200 @@ -36,6 +36,7 @@ pub mod discovery; pub mod exceptions; pub mod parsers; +mod pybytes_deref; pub mod revlog; pub mod utils; diff -r 9cd35c8c6044 -r 8f031a274cd6 rust/hg-cpython/src/pybytes_deref.rs --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-cpython/src/pybytes_deref.rs Mon Sep 06 13:39:54 2021 +0200 @@ -0,0 +1,53 @@ +use cpython::{PyBytes, Python}; + +/// Safe abstraction over a `PyBytes` together with the `&[u8]` slice +/// that borrows it. Implements `Deref`. +/// +/// Calling `PyBytes::data` requires a GIL marker but we want to access the +/// data in a thread that (ideally) does not need to acquire the GIL. +/// This type allows separating the call an the use. +/// +/// It also enables using a (wrapped) `PyBytes` in GIL-unaware generic code. +pub struct PyBytesDeref { + #[allow(unused)] + keep_alive: PyBytes, + + /// Borrows the buffer inside `self.keep_alive`, + /// but the borrow-checker cannot express self-referential structs. + data: *const [u8], +} + +impl PyBytesDeref { + pub fn new(py: Python, bytes: PyBytes) -> Self { + Self { + data: bytes.data(py), + keep_alive: bytes, + } + } + + pub fn unwrap(self) -> PyBytes { + self.keep_alive + } +} + +impl std::ops::Deref for PyBytesDeref { + type Target = [u8]; + + fn deref(&self) -> &[u8] { + // Safety: the raw pointer is valid as long as the PyBytes is still + // alive, and the returned slice borrows `self`. + unsafe { &*self.data } + } +} + +fn require_send() {} + +#[allow(unused)] +fn static_assert_pybytes_is_send() { + require_send::; +} + +// Safety: PyBytes is Send. Raw pointers are not by default, +// but here sending one to another thread is fine since we ensure it stays +// valid. +unsafe impl Send for PyBytesDeref {}