Mercurial > hg-stable
changeset 42544:ce94f9622acd
rust-dirstate: add "dirs" rust-cpython binding
There is an obvious performance and memory issue with those bindings on larger
repos as it copies and allocates everything at once, round-trip. Like in the
previous patch series, this is only temporary and will only get better once
we don't have large data structures going to and from Python.
Differential Revision: https://phab.mercurial-scm.org/D6394
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Thu, 16 May 2019 18:03:42 +0200 |
parents | 2dcee6497b0b |
children | 2db96bf84a8f |
files | rust/hg-cpython/src/dirstate.rs |
diffstat | 1 files changed, 128 insertions(+), 20 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-cpython/src/dirstate.rs Thu May 16 18:03:06 2019 +0200 +++ b/rust/hg-cpython/src/dirstate.rs Thu May 16 18:03:42 2019 +0200 @@ -11,21 +11,25 @@ //! From Python, this will be seen as `mercurial.rustext.dirstate` use cpython::{ - exc, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, PyResult, - PySequence, PythonObject, PyTuple, Python, ToPyObject, + exc, ObjectProtocol, PyBytes, PyDict, PyErr, PyInt, PyModule, PyObject, + PyResult, PySequence, PyTuple, Python, PythonObject, ToPyObject, }; use hg::{ - pack_dirstate, parse_dirstate, CopyVecEntry, DirstateEntry, - DirstatePackError, DirstateParents, DirstateParseError, DirstateVec, + pack_dirstate, parse_dirstate, CopyVecEntry, DirsIterable, DirsMultiset, + DirstateEntry, DirstateMapError, DirstatePackError, DirstateParents, + DirstateParseError, DirstateVec, }; use std::collections::HashMap; use std::ffi::CStr; + #[cfg(feature = "python27")] extern crate python27_sys as python_sys; #[cfg(feature = "python3")] extern crate python3_sys as python_sys; + use self::python_sys::PyCapsule_Import; use libc::{c_char, c_int}; +use std::cell::RefCell; use std::mem::transmute; /// C code uses a custom `dirstate_tuple` type, checks in multiple instances @@ -102,20 +106,11 @@ } } -fn pack_dirstate_wrapper( +fn extract_dirstate_vec( py: Python, - dmap: PyDict, - copymap: PyDict, - pl: PyTuple, - now: PyInt, -) -> PyResult<PyBytes> { - let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?; - let p1: &[u8] = p1.data(py); - let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?; - let p2: &[u8] = p2.data(py); - - let dirstate_vec: Result<DirstateVec, PyErr> = dmap - .items(py) + dmap: &PyDict, +) -> Result<DirstateVec, PyErr> { + dmap.items(py) .iter() .map(|(filename, stats)| { let stats = stats.extract::<PySequence>(py)?; @@ -136,7 +131,22 @@ }, )) }) - .collect(); + .collect() +} + +fn pack_dirstate_wrapper( + py: Python, + dmap: PyDict, + copymap: PyDict, + pl: PyTuple, + now: PyInt, +) -> PyResult<PyBytes> { + let p1 = pl.get_item(py, 0).extract::<PyBytes>(py)?; + let p1: &[u8] = p1.data(py); + let p2 = pl.get_item(py, 1).extract::<PyBytes>(py)?; + let p2: &[u8] = p2.data(py); + + let dirstate_vec = extract_dirstate_vec(py, &dmap)?; let copies: Result<HashMap<Vec<u8>, Vec<u8>>, PyErr> = copymap .items(py) @@ -150,7 +160,7 @@ .collect(); match pack_dirstate( - &dirstate_vec?, + &dirstate_vec, &copies?, DirstateParents { p1, p2 }, now.as_object().extract::<i32>(py)?, @@ -170,7 +180,10 @@ py, PyBytes::new(py, &filename[..]), decapsule_make_dirstate_tuple(py)?( - state as c_char, mode, size, mtime, + state as c_char, + mode, + size, + mtime, ), )?; } @@ -191,10 +204,103 @@ } } +py_class!(pub class Dirs |py| { + data dirs_map: RefCell<DirsMultiset>; + + // `map` is either a `dict` or a flat iterator (usually a `set`, sometimes + // a `list`) + def __new__( + _cls, + map: PyObject, + skip: Option<PyObject> = None + ) -> PyResult<Self> { + let mut skip_state: Option<i8> = None; + if let Some(skip) = skip { + skip_state = Some(skip.extract::<PyBytes>(py)?.data(py)[0] as i8); + } + let dirs_map; + + if let Ok(map) = map.cast_as::<PyDict>(py) { + let dirstate_vec = extract_dirstate_vec(py, &map)?; + dirs_map = DirsMultiset::new( + DirsIterable::Dirstate(dirstate_vec), + skip_state, + ) + } else { + let map: Result<Vec<Vec<u8>>, PyErr> = map + .iter(py)? + .map(|o| Ok(o?.extract::<PyBytes>(py)?.data(py).to_owned())) + .collect(); + dirs_map = DirsMultiset::new( + DirsIterable::Manifest(map?), + skip_state, + ) + } + + Self::create_instance(py, RefCell::new(dirs_map)) + } + + def addpath(&self, path: PyObject) -> PyResult<PyObject> { + self.dirs_map(py).borrow_mut().add_path( + path.extract::<PyBytes>(py)?.data(py), + ); + Ok(py.None()) + } + + def delpath(&self, path: PyObject) -> PyResult<PyObject> { + self.dirs_map(py).borrow_mut().delete_path( + path.extract::<PyBytes>(py)?.data(py), + ) + .and(Ok(py.None())) + .or_else(|e| { + match e { + DirstateMapError::PathNotFound(_p) => { + Err(PyErr::new::<exc::ValueError, _>( + py, + "expected a value, found none".to_string(), + )) + } + DirstateMapError::EmptyPath => { + Ok(py.None()) + } + } + }) + } + + // This is really inefficient on top of being ugly, but it's an easy way + // of having it work to continue working on the rest of the module + // hopefully bypassing Python entirely pretty soon. + def __iter__(&self) -> PyResult<PyObject> { + let dict = PyDict::new(py); + + for (key, value) in self.dirs_map(py).borrow().iter() { + dict.set_item( + py, + PyBytes::new(py, &key[..]), + value.to_py_object(py), + )?; + } + + let locals = PyDict::new(py); + locals.set_item(py, "obj", dict)?; + + py.eval("iter(obj)", None, Some(&locals)) + } + + def __contains__(&self, item: PyObject) -> PyResult<bool> { + Ok(self + .dirs_map(py) + .borrow() + .get(&item.extract::<PyBytes>(py)?.data(py).to_owned()) + .is_some()) + } +}); + /// Create the module, with `__package__` given from parent pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> { let dotted_name = &format!("{}.dirstate", package); let m = PyModule::new(py, dotted_name)?; + m.add(py, "__package__", package)?; m.add(py, "__doc__", "Dirstate - Rust implementation")?; m.add( @@ -219,6 +325,8 @@ ), )?; + m.add_class::<Dirs>(py)?; + let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?; sys_modules.set_item(py, dotted_name, &m)?;