--- a/rust/hg-cpython/src/revlog.rs Wed Jun 19 17:03:13 2024 +0200
+++ b/rust/hg-cpython/src/revlog.rs Wed Jun 19 19:10:49 2024 +0200
@@ -4,32 +4,43 @@
//
// This software may be used and distributed according to the terms of the
// GNU General Public License version 2 or any later version.
+#![allow(non_snake_case)]
use crate::{
conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else},
+ pybytes_deref::{PyBufferDeref, PyBytesDeref},
utils::{node_from_py_bytes, node_from_py_object},
+ vfs::PyVfs,
PyRevision,
};
use cpython::{
buffer::{Element, PyBuffer},
exc::{IndexError, ValueError},
ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList,
- PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python,
+ PyModule, PyObject, PyResult, PySet, PyTuple, PyType, Python,
PythonObject, ToPyObject, UnsafePyLeaked,
};
use hg::{
errors::HgError,
- index::{
- IndexHeader, Phase, RevisionDataParams, SnapshotsCache,
- INDEX_ENTRY_SIZE,
+ index::{Phase, RevisionDataParams, SnapshotsCache, INDEX_ENTRY_SIZE},
+ nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
+ revlog::compression::CompressionConfig,
+ revlog::inner_revlog::InnerRevlog as CoreInnerRevlog,
+ revlog::inner_revlog::RevisionBuffer,
+ revlog::options::{
+ RevlogDataConfig, RevlogDeltaConfig, RevlogFeatureConfig,
+ RevlogOpenOptions,
},
- nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree},
revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex},
- BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION,
+ transaction::Transaction,
+ utils::files::{get_bytes_from_path, get_path_from_bytes},
+ BaseRevision, Node, Revision, RevlogType, UncheckedRevision,
+ NULL_REVISION,
};
use std::{
- cell::RefCell,
+ cell::{Cell, RefCell},
collections::{HashMap, HashSet},
+ sync::OnceLock,
};
use vcsgraph::graph::Graph as VCSGraph;
@@ -41,12 +52,13 @@
/// Return a Struct implementing the Graph trait
pub(crate) fn py_rust_index_to_graph(
py: Python,
- index: PyObject,
+ index_proxy: PyObject,
) -> PyResult<UnsafePyLeaked<PySharedIndex>> {
- let midx = index.extract::<Index>(py)?;
- let leaked = midx.index(py).leak_immutable();
+ let inner_revlog = index_proxy.getattr(py, "inner")?;
+ let inner_revlog = inner_revlog.extract::<InnerRevlog>(py)?;
+ let leaked = inner_revlog.inner(py).leak_immutable();
// Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked`
- Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) })
+ Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: &idx.index }) })
}
impl Clone for PySharedIndex {
@@ -91,398 +103,6 @@
}
}
-py_class!(pub class Index |py| {
- @shared data index: hg::index::Index;
- data nt: RefCell<Option<CoreNodeTree>>;
- data docket: RefCell<Option<PyObject>>;
- // Holds a reference to the mmap'ed persistent nodemap data
- data nodemap_mmap: RefCell<Option<PyBuffer>>;
- // Holds a reference to the mmap'ed persistent index data
- data index_mmap: RefCell<Option<PyBuffer>>;
- data head_revs_py_list: RefCell<Option<PyList>>;
- data head_node_ids_py_list: RefCell<Option<PyList>>;
-
- def __new__(
- _cls,
- data: PyObject,
- default_header: u32,
- ) -> PyResult<Self> {
- Self::new(py, data, default_header)
- }
-
- /// Compatibility layer used for Python consumers needing access to the C index
- ///
- /// Only use case so far is `scmutil.shortesthexnodeidprefix`,
- /// that may need to build a custom `nodetree`, based on a specified revset.
- /// With a Rust implementation of the nodemap, we will be able to get rid of
- /// this, by exposing our own standalone nodemap class,
- /// ready to accept `Index`.
-/* def get_cindex(&self) -> PyResult<PyObject> {
- Ok(self.cindex(py).borrow().inner().clone_ref(py))
- }
-*/
- // Index API involving nodemap, as defined in mercurial/pure/parsers.py
-
- /// Return Revision if found, raises a bare `error.RevlogError`
- /// in case of ambiguity, same as C version does
- def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
- let opt = self.get_nodetree(py)?.borrow();
- let nt = opt.as_ref().unwrap();
- let ridx = &*self.index(py).borrow();
- let node = node_from_py_bytes(py, &node)?;
- let rust_rev =
- nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
- Ok(rust_rev.map(Into::into))
-
- }
-
- /// same as `get_rev()` but raises a bare `error.RevlogError` if node
- /// is not found.
- ///
- /// No need to repeat `node` in the exception, `mercurial/revlog.py`
- /// will catch and rewrap with it
- def rev(&self, node: PyBytes) -> PyResult<PyRevision> {
- self.get_rev(py, node)?.ok_or_else(|| revlog_error(py))
- }
-
- /// return True if the node exist in the index
- def has_node(&self, node: PyBytes) -> PyResult<bool> {
- // TODO OPTIM we could avoid a needless conversion here,
- // to do when scaffolding for pure Rust switch is removed,
- // as `get_rev()` currently does the necessary assertions
- self.get_rev(py, node).map(|opt| opt.is_some())
- }
-
- /// find length of shortest hex nodeid of a binary ID
- def shortest(&self, node: PyBytes) -> PyResult<usize> {
- let opt = self.get_nodetree(py)?.borrow();
- let nt = opt.as_ref().unwrap();
- let idx = &*self.index(py).borrow();
- match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
- {
- Ok(Some(l)) => Ok(l),
- Ok(None) => Err(revlog_error(py)),
- Err(e) => Err(nodemap_error(py, e)),
- }
- }
-
- def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> {
- let opt = self.get_nodetree(py)?.borrow();
- let nt = opt.as_ref().unwrap();
- let idx = &*self.index(py).borrow();
-
- let node_as_string = if cfg!(feature = "python3-sys") {
- node.cast_as::<PyString>(py)?.to_string(py)?.to_string()
- }
- else {
- let node = node.extract::<PyBytes>(py)?;
- String::from_utf8_lossy(node.data(py)).to_string()
- };
-
- let prefix = NodePrefix::from_hex(&node_as_string)
- .map_err(|_| PyErr::new::<ValueError, _>(
- py, format!("Invalid node or prefix '{}'", node_as_string))
- )?;
-
- nt.find_bin(idx, prefix)
- // TODO make an inner API returning the node directly
- .map(|opt| opt.map(
- |rev| PyBytes::new(py, idx.node(rev).unwrap().as_bytes())))
- .map_err(|e| nodemap_error(py, e))
-
- }
-
- /// append an index entry
- def append(&self, tup: PyTuple) -> PyResult<PyObject> {
- if tup.len(py) < 8 {
- // this is better than the panic promised by tup.get_item()
- return Err(
- PyErr::new::<IndexError, _>(py, "tuple index out of range"))
- }
- let node_bytes = tup.get_item(py, 7).extract(py)?;
- let node = node_from_py_object(py, &node_bytes)?;
-
- let rev = self.len(py)? as BaseRevision;
-
- // This is ok since we will just add the revision to the index
- let rev = Revision(rev);
- self.index(py)
- .borrow_mut()
- .append(py_tuple_to_revision_data_params(py, tup)?)
- .unwrap();
- let idx = &*self.index(py).borrow();
- self.get_nodetree(py)?.borrow_mut().as_mut().unwrap()
- .insert(idx, &node, rev)
- .map_err(|e| nodemap_error(py, e))?;
- Ok(py.None())
- }
-
- def __delitem__(&self, key: PyObject) -> PyResult<()> {
- // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
- let start = if let Ok(rev) = key.extract(py) {
- UncheckedRevision(rev)
- } else {
- let start = key.getattr(py, "start")?;
- UncheckedRevision(start.extract(py)?)
- };
- let start = self.index(py)
- .borrow()
- .check_revision(start)
- .ok_or_else(|| {
- nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
- })?;
- self.index(py).borrow_mut().remove(start).unwrap();
- let mut opt = self.get_nodetree(py)?.borrow_mut();
- let nt = opt.as_mut().unwrap();
- nt.invalidate_all();
- self.fill_nodemap(py, nt)?;
- Ok(())
- }
-
- //
- // Index methods previously reforwarded to C index (tp_methods)
- // Same ordering as in revlog.c
- //
-
- /// return the gca set of the given revs
- def ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
- let rust_res = self.inner_ancestors(py, args)?;
- Ok(rust_res)
- }
-
- /// return the heads of the common ancestors of the given revs
- def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> {
- let rust_res = self.inner_commonancestorsheads(py, args)?;
- Ok(rust_res)
- }
-
- /// Clear the index caches and inner py_class data.
- /// It is Python's responsibility to call `update_nodemap_data` again.
- def clearcaches(&self) -> PyResult<PyObject> {
- self.nt(py).borrow_mut().take();
- self.docket(py).borrow_mut().take();
- self.nodemap_mmap(py).borrow_mut().take();
- self.head_revs_py_list(py).borrow_mut().take();
- self.head_node_ids_py_list(py).borrow_mut().take();
- self.index(py).borrow().clear_caches();
- Ok(py.None())
- }
-
- /// return the raw binary string representing a revision
- def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
- let rindex = self.index(py).borrow();
- let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
- let rust_bytes = rindex.check_revision(rev).and_then(
- |r| rindex.entry_binary(r))
- .ok_or_else(|| rev_not_in_index(py, rev))?;
- let rust_res = PyBytes::new(py, rust_bytes).into_object();
- Ok(rust_res)
- }
-
- /// return a binary packed version of the header
- def pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
- let rindex = self.index(py).borrow();
- let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
- let rust_res = PyBytes::new(py, &packed).into_object();
- Ok(rust_res)
- }
-
- /// compute phases
- def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> {
- let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
- let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
- Ok(rust_res)
- }
-
- /// reachableroots
- def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
- let rust_res = self.inner_reachableroots2(
- py,
- UncheckedRevision(args.get_item(py, 0).extract(py)?),
- args.get_item(py, 1),
- args.get_item(py, 2),
- args.get_item(py, 3).extract(py)?,
- )?;
- Ok(rust_res)
- }
-
- /// get head revisions
- def headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
- let (filtered_revs, stop_rev) = match &args.len(py) {
- 0 => Ok((py.None(), py.None())),
- 1 => Ok((args.get_item(py, 0), py.None())),
- 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
- _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
- }?;
- self.inner_headrevs(py, &filtered_revs, &stop_rev)
- }
-
- /// get head nodeids
- def head_node_ids(&self) -> PyResult<PyObject> {
- let rust_res = self.inner_head_node_ids(py)?;
- Ok(rust_res)
- }
-
- /// get diff in head revisions
- def headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
- let rust_res = self.inner_headrevsdiff(
- py,
- &args.get_item(py, 0),
- &args.get_item(py, 1))?;
- Ok(rust_res)
- }
-
- /// True if the object is a snapshot
- def issnapshot(&self, *args, **_kw) -> PyResult<bool> {
- let index = self.index(py).borrow();
- let result = index
- .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?))
- .map_err(|e| {
- PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
- })?;
- Ok(result)
- }
-
- /// Gather snapshot data in a cache dict
- def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
- let index = self.index(py).borrow();
- let cache: PyDict = args.get_item(py, 0).extract(py)?;
- // this methods operates by setting new values in the cache,
- // hence we will compare results by letting the C implementation
- // operate over a deepcopy of the cache, and finally compare both
- // caches.
- let c_cache = PyDict::new(py);
- for (k, v) in cache.items(py) {
- c_cache.set_item(py, k, PySet::new(py, v)?)?;
- }
-
- let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
- let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
- let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
- index.find_snapshots(
- start_rev,
- end_rev,
- &mut cache_wrapper,
- ).map_err(|_| revlog_error(py))?;
- Ok(py.None())
- }
-
- /// determine revisions with deltas to reconstruct fulltext
- def deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
- let index = self.index(py).borrow();
- let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
- let stop_rev =
- args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
- let rev = index.check_revision(rev).ok_or_else(|| {
- nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
- })?;
- let stop_rev = if let Some(stop_rev) = stop_rev {
- let stop_rev = UncheckedRevision(stop_rev);
- Some(index.check_revision(stop_rev).ok_or_else(|| {
- nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
- })?)
- } else {None};
- let using_general_delta = args.get_item(py, 2)
- .extract::<Option<u32>>(py)?
- .map(|i| i != 0);
- let (chain, stopped) = index.delta_chain(
- rev, stop_rev, using_general_delta
- ).map_err(|e| {
- PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
- })?;
-
- let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
- Ok(
- PyTuple::new(
- py,
- &[
- chain.into_py_object(py).into_object(),
- stopped.into_py_object(py).into_object()
- ]
- ).into_object()
- )
-
- }
-
- /// slice planned chunk read to reach a density threshold
- def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
- let rust_res = self.inner_slicechunktodensity(
- py,
- args.get_item(py, 0),
- args.get_item(py, 1).extract(py)?,
- args.get_item(py, 2).extract(py)?
- )?;
- Ok(rust_res)
- }
-
- // index_sequence_methods and index_mapping_methods.
- //
- // Since we call back through the high level Python API,
- // there's no point making a distinction between index_get
- // and index_getitem.
- // gracinet 2023: this above is no longer true for the pure Rust impl
-
- def __len__(&self) -> PyResult<usize> {
- self.len(py)
- }
-
- def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
- let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
- Ok(rust_res)
- }
-
- def __contains__(&self, item: PyObject) -> PyResult<bool> {
- // ObjectProtocol does not seem to provide contains(), so
- // this is an equivalent implementation of the index_contains()
- // defined in revlog.c
- match item.extract::<i32>(py) {
- Ok(rev) => {
- Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
- }
- Err(_) => {
- let item_bytes: PyBytes = item.extract(py)?;
- let rust_res = self.has_node(py, item_bytes)?;
- Ok(rust_res)
- }
- }
- }
-
- def nodemap_data_all(&self) -> PyResult<PyBytes> {
- self.inner_nodemap_data_all(py)
- }
-
- def nodemap_data_incremental(&self) -> PyResult<PyObject> {
- self.inner_nodemap_data_incremental(py)
- }
- def update_nodemap_data(
- &self,
- docket: PyObject,
- nm_data: PyObject
- ) -> PyResult<PyObject> {
- self.inner_update_nodemap_data(py, docket, nm_data)
- }
-
- @property
- def entry_size(&self) -> PyResult<PyInt> {
- let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
- Ok(rust_res)
- }
-
- @property
- def rust_ext_compat(&self) -> PyResult<PyInt> {
- // will be entirely removed when the Rust index yet useful to
- // implement in Rust to detangle things when removing `self.cindex`
- let rust_res: PyInt = 1.to_py_object(py);
- Ok(rust_res)
- }
-
- @property
- def is_rust(&self) -> PyResult<PyBool> {
- Ok(false.to_py_object(py))
- }
-
-});
-
/// Take a (potentially) mmap'ed buffer, and return the underlying Python
/// buffer along with the Rust slice into said buffer. We need to keep the
/// Python buffer around, otherwise we'd get a dangling pointer once the buffer
@@ -538,7 +158,7 @@
.extract::<PyBytes>(py)?
.data(py)
.try_into()
- .unwrap();
+ .expect("nodeid should be set");
let flags = (offset_or_flags & 0xFFFF) as u16;
let data_offset = offset_or_flags >> 16;
Ok(RevisionDataParams {
@@ -622,35 +242,1168 @@
}
}
-impl Index {
- fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> {
- // Safety: we keep the buffer around inside the class as `index_mmap`
- let (buf, bytes) = unsafe { mmap_keeparound(py, data)? };
+// There are no static generics in Rust (because their implementation is hard,
+// I'm guessing it's due to different compilation stages, etc.).
+// So manually generate all three caches and use them in `with_filelog_cache`.
+static DELTA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDeltaConfig)> =
+ OnceLock::new();
+static DATA_CONFIG_CACHE: OnceLock<(PyObject, RevlogDataConfig)> =
+ OnceLock::new();
+static FEATURE_CONFIG_CACHE: OnceLock<(PyObject, RevlogFeatureConfig)> =
+ OnceLock::new();
+
+/// Cache the first conversion from Python -> Rust config for all filelogs to
+/// save on conversion time when called in a loop.
+fn with_filelog_cache<T: Copy>(
+ py: Python,
+ py_config: &PyObject,
+ revlog_type: RevlogType,
+ cache: &OnceLock<(PyObject, T)>,
+ callback: impl Fn() -> PyResult<T>,
+) -> PyResult<T> {
+ let mut was_cached = false;
+ if revlog_type == RevlogType::Filelog {
+ if let Some((cached_py_config, rust_config)) = cache.get() {
+ was_cached = true;
+ // All filelogs in a given repository *most likely* have the
+ // exact same config, but it's not impossible that some extensions
+ // do some magic with configs or that this code will be used
+ // for longer-running processes. So compare the source `PyObject`
+ // in case the source changed, at the cost of some overhead.
+ // We can't use `py_config.eq(cached_py_config)` because all config
+ // objects are different in Python and `a is b` is false.
+ if py_config.compare(py, cached_py_config)?.is_eq() {
+ return Ok(*rust_config);
+ }
+ }
+ }
+ let config = callback()?;
+ // Do not call the lock unnecessarily if it's already been set.
+ if !was_cached && revlog_type == RevlogType::Filelog {
+ cache.set((py_config.clone_ref(py), config)).ok();
+ }
+ Ok(config)
+}
+
+fn extract_delta_config(
+ py: Python,
+ py_config: PyObject,
+ revlog_type: RevlogType,
+) -> PyResult<RevlogDeltaConfig> {
+ let get_delta_config = || {
+ let max_deltachain_span = py_config
+ .getattr(py, "max_deltachain_span")?
+ .extract::<i64>(py)?;
+
+ let revlog_delta_config = RevlogDeltaConfig {
+ general_delta: py_config
+ .getattr(py, "general_delta")?
+ .extract(py)?,
+ sparse_revlog: py_config
+ .getattr(py, "sparse_revlog")?
+ .extract(py)?,
+ max_chain_len: py_config
+ .getattr(py, "max_chain_len")?
+ .extract(py)?,
+ max_deltachain_span: if max_deltachain_span < 0 {
+ None
+ } else {
+ Some(max_deltachain_span as u64)
+ },
+ upper_bound_comp: py_config
+ .getattr(py, "upper_bound_comp")?
+ .extract(py)?,
+ delta_both_parents: py_config
+ .getattr(py, "delta_both_parents")?
+ .extract(py)?,
+ candidate_group_chunk_size: py_config
+ .getattr(py, "candidate_group_chunk_size")?
+ .extract(py)?,
+ debug_delta: py_config.getattr(py, "debug_delta")?.extract(py)?,
+ lazy_delta: py_config.getattr(py, "lazy_delta")?.extract(py)?,
+ lazy_delta_base: py_config
+ .getattr(py, "lazy_delta_base")?
+ .extract(py)?,
+ };
+ Ok(revlog_delta_config)
+ };
+ with_filelog_cache(
+ py,
+ &py_config,
+ revlog_type,
+ &DELTA_CONFIG_CACHE,
+ get_delta_config,
+ )
+}
+
+fn extract_data_config(
+ py: Python,
+ py_config: PyObject,
+ revlog_type: RevlogType,
+) -> PyResult<RevlogDataConfig> {
+ let get_data_config = || {
+ Ok(RevlogDataConfig {
+ try_pending: py_config.getattr(py, "try_pending")?.extract(py)?,
+ try_split: py_config.getattr(py, "try_split")?.extract(py)?,
+ check_ambig: py_config.getattr(py, "check_ambig")?.extract(py)?,
+ mmap_large_index: py_config
+ .getattr(py, "mmap_large_index")?
+ .extract(py)?,
+ mmap_index_threshold: py_config
+ .getattr(py, "mmap_index_threshold")?
+ .extract(py)?,
+ chunk_cache_size: py_config
+ .getattr(py, "chunk_cache_size")?
+ .extract(py)?,
+ uncompressed_cache_factor: py_config
+ .getattr(py, "uncompressed_cache_factor")?
+ .extract(py)?,
+ uncompressed_cache_count: py_config
+ .getattr(py, "uncompressed_cache_count")?
+ .extract(py)?,
+ with_sparse_read: py_config
+ .getattr(py, "with_sparse_read")?
+ .extract(py)?,
+ sr_density_threshold: py_config
+ .getattr(py, "sr_density_threshold")?
+ .extract(py)?,
+ sr_min_gap_size: py_config
+ .getattr(py, "sr_min_gap_size")?
+ .extract(py)?,
+ general_delta: py_config
+ .getattr(py, "generaldelta")?
+ .extract(py)?,
+ })
+ };
+
+ with_filelog_cache(
+ py,
+ &py_config,
+ revlog_type,
+ &DATA_CONFIG_CACHE,
+ get_data_config,
+ )
+}
- Self::create_instance(
+fn extract_feature_config(
+ py: Python,
+ py_config: PyObject,
+ revlog_type: RevlogType,
+) -> PyResult<RevlogFeatureConfig> {
+ let get_feature_config = || {
+ let engine_bytes = &py_config
+ .getattr(py, "compression_engine")?
+ .extract::<PyBytes>(py)?;
+ let compression_engine = engine_bytes.data(py);
+ let compression_engine = match compression_engine {
+ b"zlib" => {
+ let compression_options = &py_config
+ .getattr(py, "compression_engine_options")?
+ .extract::<PyDict>(py)?;
+ let zlib_level = compression_options
+ .get_item(py, PyBytes::new(py, &b"zlib.level"[..]));
+ let level = if let Some(level) = zlib_level {
+ if level.is_none(py) {
+ None
+ } else {
+ Some(level.extract(py)?)
+ }
+ } else {
+ None
+ };
+ let mut engine = CompressionConfig::default();
+ if let Some(level) = level {
+ engine
+ .set_level(level)
+ .expect("invalid compression level from Python");
+ }
+ engine
+ }
+ b"zstd" => {
+ let compression_options = &py_config
+ .getattr(py, "compression_engine_options")?
+ .extract::<PyDict>(py)?;
+ let zstd_level = compression_options
+ .get_item(py, PyBytes::new(py, &b"zstd.level"[..]));
+ let level = if let Some(level) = zstd_level {
+ if level.is_none(py) {
+ None
+ } else {
+ Some(level.extract(py)?)
+ }
+ } else {
+ let level = compression_options
+ .get_item(py, PyBytes::new(py, &b"level"[..]));
+ if let Some(level) = level {
+ if level.is_none(py) {
+ None
+ } else {
+ Some(level.extract(py)?)
+ }
+ } else {
+ None
+ }
+ };
+ CompressionConfig::zstd(level)
+ .expect("invalid compression level from Python")
+ }
+ b"none" => CompressionConfig::None,
+ e => {
+ return Err(PyErr::new::<ValueError, _>(
+ py,
+ format!(
+ "invalid compression engine {}",
+ String::from_utf8_lossy(e)
+ ),
+ ))
+ }
+ };
+ let revlog_feature_config = RevlogFeatureConfig {
+ compression_engine,
+ censorable: py_config.getattr(py, "censorable")?.extract(py)?,
+ has_side_data: py_config
+ .getattr(py, "has_side_data")?
+ .extract(py)?,
+ compute_rank: py_config
+ .getattr(py, "compute_rank")?
+ .extract(py)?,
+ canonical_parent_order: py_config
+ .getattr(py, "canonical_parent_order")?
+ .extract(py)?,
+ enable_ellipsis: py_config
+ .getattr(py, "enable_ellipsis")?
+ .extract(py)?,
+ };
+ Ok(revlog_feature_config)
+ };
+ with_filelog_cache(
+ py,
+ &py_config,
+ revlog_type,
+ &FEATURE_CONFIG_CACHE,
+ get_feature_config,
+ )
+}
+
+fn revlog_error_from_msg(py: Python, e: impl ToString) -> PyErr {
+ let msg = e.to_string();
+
+ match py
+ .import("mercurial.error")
+ .and_then(|m| m.get(py, "RevlogError"))
+ {
+ Err(e) => e,
+ Ok(cls) => {
+ let msg = PyBytes::new(py, msg.as_bytes());
+ PyErr::from_instance(
+ py,
+ cls.call(py, (msg,), None).ok().into_py_object(py),
+ )
+ }
+ }
+}
+
+py_class!(pub class ReadingContextManager |py| {
+ data inner_revlog: RefCell<InnerRevlog>;
+
+ def __enter__(&self) -> PyResult<PyObject> {
+ let res = self.inner_revlog(py)
+ .borrow()
+ .inner(py)
+ .borrow()
+ .enter_reading_context()
+ .map_err(|e| revlog_error_from_msg(py, e));
+ if let Err(e) = res {
+ // `__exit__` is not called from Python if `__enter__` fails
+ self.inner_revlog(py)
+ .borrow()
+ .inner(py)
+ .borrow()
+ .exit_reading_context();
+ return Err(e)
+ }
+ Ok(py.None())
+ }
+
+ def __exit__(
+ &self,
+ ty: Option<PyType>,
+ value: PyObject,
+ traceback: PyObject
+ ) -> PyResult<PyObject> {
+ // unused arguments, keep clippy from complaining without adding
+ // a general rule
+ let _ = ty;
+ let _ = value;
+ let _ = traceback;
+
+ self.inner_revlog(py)
+ .borrow()
+ .inner(py)
+ .borrow()
+ .exit_reading_context();
+ Ok(py.None())
+ }
+});
+
+// Only used from Python *tests*
+py_class!(pub class PyFileHandle |py| {
+ data inner_file: RefCell<std::os::fd::RawFd>;
+
+ def tell(&self) -> PyResult<PyObject> {
+ let locals = PyDict::new(py);
+ locals.set_item(py, "os", py.import("os")?)?;
+ locals.set_item(py, "fd", *self.inner_file(py).borrow())?;
+ let f = py.eval("os.fdopen(fd)", None, Some(&locals))?;
+
+ // Prevent Python from closing the file after garbage collecting.
+ // This is fine since Rust is still holding on to the actual File.
+ // (and also because it's only used in tests).
+ std::mem::forget(f.clone_ref(py));
+
+ locals.set_item(py, "f", f)?;
+ let res = py.eval("f.tell()", None, Some(&locals))?;
+ Ok(res)
+ }
+});
+
+/// Wrapper around a Python transaction object, to keep `hg-core` oblivious
+/// of the fact it's being called from Python.
+pub struct PyTransaction {
+ inner: PyObject,
+}
+
+impl PyTransaction {
+ pub fn new(inner: PyObject) -> Self {
+ Self { inner }
+ }
+}
+
+impl Clone for PyTransaction {
+ fn clone(&self) -> Self {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ Self {
+ inner: self.inner.clone_ref(py),
+ }
+ }
+}
+
+impl Transaction for PyTransaction {
+ fn add(&mut self, file: impl AsRef<std::path::Path>, offset: usize) {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ let file = PyBytes::new(py, &get_bytes_from_path(file.as_ref()));
+ self.inner
+ .call_method(py, "add", (file, offset), None)
+ .expect("transaction add failed");
+ }
+}
+
+py_class!(pub class WritingContextManager |py| {
+ data inner_revlog: RefCell<InnerRevlog>;
+ data transaction: RefCell<PyTransaction>;
+ data data_end: Cell<Option<usize>>;
+
+ def __enter__(&self) -> PyResult<PyObject> {
+ let res = self.inner_revlog(py)
+ .borrow_mut()
+ .inner(py)
+ .borrow_mut()
+ .enter_writing_context(
+ self.data_end(py).get(),
+ &mut *self.transaction(py).borrow_mut()
+ ).map_err(|e| revlog_error_from_msg(py, e));
+ if let Err(e) = res {
+ // `__exit__` is not called from Python if `__enter__` fails
+ self.inner_revlog(py)
+ .borrow_mut()
+ .inner(py)
+ .borrow_mut()
+ .exit_writing_context();
+ return Err(e)
+ }
+ Ok(py.None())
+ }
+
+ def __exit__(
+ &self,
+ ty: Option<PyType>,
+ value: PyObject,
+ traceback: PyObject
+ ) -> PyResult<PyObject> {
+ // unused arguments, keep clippy from complaining without adding
+ // a general rule
+ let _ = ty;
+ let _ = value;
+ let _ = traceback;
+
+ self.inner_revlog(py)
+ .borrow_mut()
+ .inner(py)
+ .borrow_mut()
+ .exit_writing_context();
+ Ok(py.None())
+ }
+});
+
+py_class!(pub class InnerRevlog |py| {
+ @shared data inner: CoreInnerRevlog;
+ data nt: RefCell<Option<CoreNodeTree>>;
+ data docket: RefCell<Option<PyObject>>;
+ // Holds a reference to the mmap'ed persistent nodemap data
+ data nodemap_mmap: RefCell<Option<PyBuffer>>;
+ // Holds a reference to the mmap'ed persistent index data
+ data index_mmap: RefCell<PyBuffer>;
+ data head_revs_py_list: RefCell<Option<PyList>>;
+ data head_node_ids_py_list: RefCell<Option<PyList>>;
+ data revision_cache: RefCell<Option<PyObject>>;
+
+ def __new__(
+ _cls,
+ opener: PyObject,
+ index_data: PyObject,
+ index_file: PyObject,
+ data_file: PyObject,
+ sidedata_file: PyObject,
+ inline: bool,
+ data_config: PyObject,
+ delta_config: PyObject,
+ feature_config: PyObject,
+ chunk_cache: PyObject,
+ default_compression_header: PyObject,
+ revlog_type: usize,
+ ) -> PyResult<Self> {
+ Self::inner_new(
py,
- hg::index::Index::new(
- bytes,
- IndexHeader::parse(&header.to_be_bytes())
- .expect("default header is broken"),
+ opener,
+ index_data,
+ index_file,
+ data_file,
+ sidedata_file,
+ inline,
+ data_config,
+ delta_config,
+ feature_config,
+ chunk_cache,
+ default_compression_header,
+ revlog_type
+ )
+ }
+
+ def clear_cache(&self) -> PyResult<PyObject> {
+ assert!(!self.is_delaying(py)?);
+ self.revision_cache(py).borrow_mut().take();
+ self.inner(py).borrow_mut().clear_cache();
+ Ok(py.None())
+ }
+
+ @property def canonical_index_file(&self) -> PyResult<PyBytes> {
+ let path = self.inner(py).borrow().canonical_index_file();
+ Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+ }
+
+ @property def is_delaying(&self) -> PyResult<bool> {
+ Ok(self.inner(py).borrow().is_delaying())
+ }
+
+ @property def _revisioncache(&self) -> PyResult<PyObject> {
+ let cache = &*self.revision_cache(py).borrow();
+ match cache {
+ None => Ok(py.None()),
+ Some(cache) => {
+ Ok(cache.clone_ref(py))
+ }
+ }
+
+ }
+
+ @property def _writinghandles(&self) -> PyResult<PyObject> {
+ use std::os::fd::AsRawFd;
+
+ let inner = self.inner(py).borrow();
+ let handles = inner.python_writing_handles();
+
+ match handles.as_ref() {
+ None => Ok(py.None()),
+ Some(handles) => {
+ let d_handle = if let Some(d_handle) = &handles.data_handle {
+ let handle = RefCell::new(d_handle.file.as_raw_fd());
+ Some(PyFileHandle::create_instance(py, handle)?)
+ } else {
+ None
+ };
+ let handle =
+ RefCell::new(handles.index_handle.file.as_raw_fd());
+ Ok(
+ (
+ PyFileHandle::create_instance(py, handle)?,
+ d_handle,
+ py.None(), // Sidedata handle
+
+ ).to_py_object(py).into_object()
+ )
+ }
+ }
+
+ }
+
+ @_revisioncache.setter def set_revision_cache(
+ &self,
+ value: Option<PyObject>
+ ) -> PyResult<()> {
+ *self.revision_cache(py).borrow_mut() = value.clone_ref(py);
+ match value {
+ None => {
+ // This means the property has been deleted, *not* that the
+ // property has been set to `None`. Whatever happens is up
+ // to the implementation. Here we just set it to `None`.
+ self
+ .inner(py)
+ .borrow()
+ .last_revision_cache
+ .lock()
+ .expect("lock should not be held")
+ .take();
+ },
+ Some(tuple) => {
+ if tuple.is_none(py) {
+ self
+ .inner(py)
+ .borrow()
+ .last_revision_cache
+ .lock()
+ .expect("lock should not be held")
+ .take();
+ return Ok(())
+ }
+ let node = tuple.get_item(py, 0)?.extract::<PyBytes>(py)?;
+ let node = node_from_py_bytes(py, &node)?;
+ let rev = tuple.get_item(py, 1)?.extract::<BaseRevision>(py)?;
+ // Ok because Python only sets this if the revision has been
+ // checked
+ let rev = Revision(rev);
+ let data = tuple.get_item(py, 2)?.extract::<PyBytes>(py)?;
+ let inner = self.inner(py).borrow();
+ let mut last_revision_cache = inner
+ .last_revision_cache
+ .lock()
+ .expect("lock should not be held");
+ *last_revision_cache =
+ Some((node, rev, Box::new(PyBytesDeref::new(py, data))));
+ }
+ }
+ Ok(())
+ }
+
+ @property def inline(&self) -> PyResult<bool> {
+ Ok(self.inner(py).borrow().is_inline())
+ }
+
+ @inline.setter def set_inline(
+ &self,
+ value: Option<PyObject>
+ ) -> PyResult<()> {
+ if let Some(v) = value {
+ self.inner(py).borrow_mut().inline = v.extract(py)?;
+ };
+ Ok(())
+ }
+
+ @property def index_file(&self) -> PyResult<PyBytes> {
+ Ok(
+ PyBytes::new(
+ py,
+ &get_bytes_from_path(&self.inner(py).borrow().index_file)
)
- .map_err(|e| {
- revlog_error_with_msg(py, e.to_string().as_bytes())
- })?,
- RefCell::new(None),
- RefCell::new(None),
- RefCell::new(None),
- RefCell::new(Some(buf)),
- RefCell::new(None),
- RefCell::new(None),
)
}
+ @index_file.setter def set_index_file(
+ &self,
+ value: Option<PyObject>
+ ) -> PyResult<()> {
+ let path = get_path_from_bytes(
+ value
+ .expect("don't delete the index path")
+ .extract::<PyBytes>(py)?
+ .data(py)
+ ).to_owned();
+ self.inner(py).borrow_mut().index_file = path;
+ Ok(())
+ }
+
+ @property def is_writing(&self) -> PyResult<bool> {
+ Ok(self.inner(py).borrow().is_writing())
+ }
+
+ @property def is_open(&self) -> PyResult<bool> {
+ Ok(self.inner(py).borrow().is_open())
+ }
+
+ def issnapshot(&self, rev: PyRevision) -> PyResult<bool> {
+ self.inner_issnapshot(py, UncheckedRevision(rev.0))
+ }
+
+ def _deltachain(&self, *args, **kw) -> PyResult<PyObject> {
+ let inner = self.inner(py).borrow();
+ let general_delta = inner.index.uses_generaldelta();
+ let args = PyTuple::new(
+ py,
+ &[
+ args.get_item(py, 0),
+ kw.and_then(|d| d.get_item(py, "stoprev")).to_py_object(py),
+ general_delta.to_py_object(py).into_object(),
+ ]
+ );
+ self._index_deltachain(py, &args, kw)
+ }
+
+ def compress(&self, data: PyObject) -> PyResult<PyTuple> {
+ let inner = self.inner(py).borrow();
+ let py_buffer = PyBuffer::get(py, &data)?;
+ let deref = PyBufferDeref::new(py, py_buffer)?;
+ let compressed = inner.compress(&deref)
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let compressed = compressed.as_deref();
+ let header = if compressed.is_some() {
+ PyBytes::new(py, &b""[..])
+ } else {
+ PyBytes::new(py, &b"u"[..])
+ };
+ Ok(
+ (
+ header,
+ PyBytes::new(py, compressed.unwrap_or(&deref))
+ ).to_py_object(py)
+ )
+ }
+
+ def reading(&self) -> PyResult<ReadingContextManager> {
+ ReadingContextManager::create_instance(
+ py,
+ RefCell::new(self.clone_ref(py)),
+ )
+ }
+
+ def writing(
+ &self,
+ transaction: PyObject,
+ data_end: Option<usize>,
+ sidedata_end: Option<usize>,
+ ) -> PyResult<WritingContextManager> {
+ // Silence unused argument (only relevant for changelog v2)
+ let _ = sidedata_end;
+ WritingContextManager::create_instance(
+ py,
+ RefCell::new(self.clone_ref(py)),
+ RefCell::new(PyTransaction::new(transaction)),
+ Cell::new(data_end)
+ )
+ }
+
+ def split_inline(
+ &self,
+ _tr: PyObject,
+ header: i32,
+ new_index_file_path: Option<PyObject>
+ ) -> PyResult<PyBytes> {
+ let mut inner = self.inner(py).borrow_mut();
+ let new_index_file_path = match new_index_file_path {
+ Some(path) => {
+ let path = path.extract::<PyBytes>(py)?;
+ Some(get_path_from_bytes(path.data(py)).to_owned())
+ },
+ None => None,
+ };
+ let header = hg::index::IndexHeader::parse(&header.to_be_bytes());
+ let header = header.expect("invalid header bytes");
+ let path = inner
+ .split_inline(header, new_index_file_path)
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+ }
+
+ def get_segment_for_revs(
+ &self,
+ startrev: PyRevision,
+ endrev: PyRevision,
+ ) -> PyResult<PyTuple> {
+ let inner = self.inner(py).borrow();
+ let (offset, data) = inner
+ .get_segment_for_revs(Revision(startrev.0), Revision(endrev.0))
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let data = PyBytes::new(py, &data);
+ Ok((offset, data).to_py_object(py))
+ }
+
+ def raw_text(
+ &self,
+ _node: PyObject,
+ rev: PyRevision
+ ) -> PyResult<PyBytes> {
+ let inner = self.inner(py).borrow();
+ let mut py_bytes = PyBytes::new(py, &[]);
+ inner
+ .raw_text(Revision(rev.0), |size, f| {
+ py_bytes = with_pybytes_buffer(py, size, f)?;
+ Ok(())
+ }).map_err(|e| revlog_error_from_msg(py, e))?;
+ Ok(py_bytes)
+ }
+
+ def _chunk(
+ &self,
+ rev: PyRevision,
+ ) -> PyResult<PyBytes> {
+ let inner = self.inner(py).borrow();
+ let chunk = inner
+ .chunk_for_rev(Revision(rev.0))
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let chunk = PyBytes::new(py, &chunk);
+ Ok(chunk)
+ }
+
+ def write_entry(
+ &self,
+ transaction: PyObject,
+ entry: PyObject,
+ data: PyTuple,
+ _link: PyObject,
+ offset: usize,
+ _sidedata: PyObject,
+ _sidedata_offset: PyInt,
+ index_end: Option<u64>,
+ data_end: Option<u64>,
+ _sidedata_end: Option<PyInt>,
+ ) -> PyResult<PyTuple> {
+ let mut inner = self.inner(py).borrow_mut();
+ let transaction = PyTransaction::new(transaction);
+ let py_bytes = entry.extract(py)?;
+ let entry = PyBytesDeref::new(py, py_bytes);
+ let header = data.get_item(py, 0).extract::<PyBytes>(py)?;
+ let header = header.data(py);
+ let data = data.get_item(py, 1);
+ let py_bytes = data.extract(py)?;
+ let data = PyBytesDeref::new(py, py_bytes);
+ Ok(
+ inner.write_entry(
+ transaction,
+ &entry,
+ (header, &data),
+ offset,
+ index_end,
+ data_end
+ ).map_err(|e| revlog_error_from_msg(py, e))?
+ .to_py_object(py)
+ )
+ }
+
+ def delay(&self) -> PyResult<Option<PyBytes>> {
+ let path = self.inner(py)
+ .borrow_mut()
+ .delay()
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ Ok(path.map(|p| PyBytes::new(py, &get_bytes_from_path(p))))
+ }
+
+ def write_pending(&self) -> PyResult<PyTuple> {
+ let (path, any_pending) = self.inner(py)
+ .borrow_mut()
+ .write_pending()
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let maybe_path = match path {
+ Some(path) => {
+ PyBytes::new(py, &get_bytes_from_path(path)).into_object()
+ },
+ None => {
+ py.None()
+ }
+ };
+ Ok(
+ (
+ maybe_path,
+ any_pending
+ ).to_py_object(py)
+ )
+ }
+
+ def finalize_pending(&self) -> PyResult<PyBytes> {
+ let path = self.inner(py)
+ .borrow_mut()
+ .finalize_pending()
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ Ok(PyBytes::new(py, &get_bytes_from_path(path)))
+ }
+
+ // -- forwarded index methods --
+
+ def _index_get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> {
+ let opt = self.get_nodetree(py)?.borrow();
+ let nt = opt.as_ref().expect("nodetree should be set");
+ let ridx = &self.inner(py).borrow().index;
+ let node = node_from_py_bytes(py, &node)?;
+ let rust_rev =
+ nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?;
+ Ok(rust_rev.map(Into::into))
+ }
+
+ /// same as `_index_get_rev()` but raises a bare `error.RevlogError` if node
+ /// is not found.
+ ///
+ /// No need to repeat `node` in the exception, `mercurial/revlog.py`
+ /// will catch and rewrap with it
+ def _index_rev(&self, node: PyBytes) -> PyResult<PyRevision> {
+ self._index_get_rev(py, node)?.ok_or_else(|| revlog_error(py))
+ }
+
+ /// return True if the node exist in the index
+ def _index_has_node(&self, node: PyBytes) -> PyResult<bool> {
+ // TODO OPTIM we could avoid a needless conversion here,
+ // to do when scaffolding for pure Rust switch is removed,
+ // as `_index_get_rev()` currently does the necessary assertions
+ self._index_get_rev(py, node).map(|opt| opt.is_some())
+ }
+
+ /// find length of shortest hex nodeid of a binary ID
+ def _index_shortest(&self, node: PyBytes) -> PyResult<usize> {
+ let opt = self.get_nodetree(py)?.borrow();
+ let nt = opt.as_ref().expect("nodetree should be set");
+ let idx = &self.inner(py).borrow().index;
+ match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
+ {
+ Ok(Some(l)) => Ok(l),
+ Ok(None) => Err(revlog_error(py)),
+ Err(e) => Err(nodemap_error(py, e)),
+ }
+ }
+
+ def _index_partialmatch(
+ &self,
+ node: PyObject
+ ) -> PyResult<Option<PyBytes>> {
+ let opt = self.get_nodetree(py)?.borrow();
+ let nt = opt.as_ref().expect("nodetree should be set");
+ let idx = &self.inner(py).borrow().index;
+
+ let node = node.extract::<PyBytes>(py)?;
+ let node_as_string = String::from_utf8_lossy(node.data(py));
+
+ let prefix = NodePrefix::from_hex(node_as_string.to_string())
+ .map_err(|_| PyErr::new::<ValueError, _>(
+ py, format!("Invalid node or prefix '{}'", node_as_string))
+ )?;
+
+ nt.find_bin(idx, prefix)
+ // TODO make an inner API returning the node directly
+ .map(|opt| opt.map(|rev| {
+ PyBytes::new(
+ py,
+ idx.node(rev).expect("node should exist").as_bytes()
+ )
+ }))
+ .map_err(|e| nodemap_error(py, e))
+
+ }
+
+ /// append an index entry
+ def _index_append(&self, tup: PyTuple) -> PyResult<PyObject> {
+ if tup.len(py) < 8 {
+ // this is better than the panic promised by tup.get_item()
+ return Err(
+ PyErr::new::<IndexError, _>(py, "tuple index out of range"))
+ }
+ let node_bytes = tup.get_item(py, 7).extract(py)?;
+ let node = node_from_py_object(py, &node_bytes)?;
+
+ let rev = self.len(py)? as BaseRevision;
+
+ // This is ok since we will just add the revision to the index
+ let rev = Revision(rev);
+ self.inner(py)
+ .borrow_mut()
+ .index
+ .append(py_tuple_to_revision_data_params(py, tup)?)
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let idx = &self.inner(py).borrow().index;
+ self.get_nodetree(py)?
+ .borrow_mut()
+ .as_mut()
+ .expect("nodetree should be set")
+ .insert(idx, &node, rev)
+ .map_err(|e| nodemap_error(py, e))?;
+ Ok(py.None())
+ }
+
+ def _index___delitem__(&self, key: PyObject) -> PyResult<PyObject> {
+ // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]`
+ let start = if let Ok(rev) = key.extract(py) {
+ UncheckedRevision(rev)
+ } else {
+ let start = key.getattr(py, "start")?;
+ UncheckedRevision(start.extract(py)?)
+ };
+ let mut borrow = self.inner(py).borrow_mut();
+ let start = borrow
+ .index
+ .check_revision(start)
+ .ok_or_else(|| {
+ nodemap_error(py, NodeMapError::RevisionNotInIndex(start))
+ })?;
+ borrow.index
+ .remove(start)
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ drop(borrow);
+ let mut opt = self.get_nodetree(py)?.borrow_mut();
+ let nt = opt.as_mut().expect("nodetree should be set");
+ nt.invalidate_all();
+ self.fill_nodemap(py, nt)?;
+ Ok(py.None())
+ }
+
+ /// return the gca set of the given revs
+ def _index_ancestors(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rust_res = self.inner_ancestors(py, args)?;
+ Ok(rust_res)
+ }
+
+ /// return the heads of the common ancestors of the given revs
+ def _index_commonancestorsheads(
+ &self,
+ *args,
+ **_kw
+ ) -> PyResult<PyObject> {
+ let rust_res = self.inner_commonancestorsheads(py, args)?;
+ Ok(rust_res)
+ }
+
+ /// Clear the index caches and inner py_class data.
+ /// It is Python's responsibility to call `update_nodemap_data` again.
+ def _index_clearcaches(&self) -> PyResult<PyObject> {
+ self.nt(py).borrow_mut().take();
+ self.docket(py).borrow_mut().take();
+ self.nodemap_mmap(py).borrow_mut().take();
+ self.head_revs_py_list(py).borrow_mut().take();
+ self.head_node_ids_py_list(py).borrow_mut().take();
+ self.inner(py).borrow_mut().index.clear_caches();
+ Ok(py.None())
+ }
+
+ /// return the raw binary string representing a revision
+ def _index_entry_binary(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rindex = &self.inner(py).borrow().index;
+ let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+ let rust_bytes = rindex.check_revision(rev).and_then(
+ |r| rindex.entry_binary(r)).ok_or_else(|| rev_not_in_index(py, rev)
+ )?;
+ let rust_res = PyBytes::new(py, rust_bytes).into_object();
+ Ok(rust_res)
+ }
+
+
+ /// return a binary packed version of the header
+ def _index_pack_header(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rindex = &self.inner(py).borrow().index;
+ let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?);
+ let rust_res = PyBytes::new(py, &packed).into_object();
+ Ok(rust_res)
+ }
+
+ /// compute phases
+ def _index_computephasesmapsets(
+ &self,
+ *args,
+ **_kw
+ ) -> PyResult<PyObject> {
+ let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?;
+ let rust_res = self.inner_computephasesmapsets(py, py_roots)?;
+ Ok(rust_res)
+ }
+
+ /// reachableroots
+ def _index_reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rust_res = self.inner_reachableroots2(
+ py,
+ UncheckedRevision(args.get_item(py, 0).extract(py)?),
+ args.get_item(py, 1),
+ args.get_item(py, 2),
+ args.get_item(py, 3).extract(py)?,
+ )?;
+ Ok(rust_res)
+ }
+
+ /// get head revisions
+ def _index_headrevs(&self, *args, **_kw) -> PyResult<PyObject> {
+ let (filtered_revs, stop_rev) = match &args.len(py) {
+ 0 => Ok((py.None(), py.None())),
+ 1 => Ok((args.get_item(py, 0), py.None())),
+ 2 => Ok((args.get_item(py, 0), args.get_item(py, 1))),
+ _ => Err(PyErr::new::<cpython::exc::TypeError, _>(py, "too many arguments")),
+ }?;
+ self.inner_headrevs(py, &filtered_revs, &stop_rev)
+ }
+
+ /// get head nodeids
+ def _index_head_node_ids(&self) -> PyResult<PyObject> {
+ let rust_res = self.inner_head_node_ids(py)?;
+ Ok(rust_res)
+ }
+
+ /// get diff in head revisions
+ def _index_headrevsdiff(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rust_res = self.inner_headrevsdiff(
+ py,
+ &args.get_item(py, 0),
+ &args.get_item(py, 1))?;
+ Ok(rust_res)
+ }
+
+ /// True if the object is a snapshot
+ def _index_issnapshot(&self, *args, **_kw) -> PyResult<bool> {
+ let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?);
+ self.inner_issnapshot(py, rev)
+ }
+
+ /// Gather snapshot data in a cache dict
+ def _index_findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> {
+ let index = &self.inner(py).borrow().index;
+ let cache: PyDict = args.get_item(py, 0).extract(py)?;
+ // this methods operates by setting new values in the cache,
+ // hence we will compare results by letting the C implementation
+ // operate over a deepcopy of the cache, and finally compare both
+ // caches.
+ let c_cache = PyDict::new(py);
+ for (k, v) in cache.items(py) {
+ c_cache.set_item(py, k, PySet::new(py, v)?)?;
+ }
+
+ let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?);
+ let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?);
+ let mut cache_wrapper = PySnapshotsCache{ py, dict: cache };
+ index.find_snapshots(
+ start_rev,
+ end_rev,
+ &mut cache_wrapper,
+ ).map_err(|_| revlog_error(py))?;
+ Ok(py.None())
+ }
+
+ /// determine revisions with deltas to reconstruct fulltext
+ def _index_deltachain(&self, *args, **_kw) -> PyResult<PyObject> {
+ let index = &self.inner(py).borrow().index;
+ let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into();
+ let stop_rev =
+ args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?;
+ let rev = index.check_revision(rev).ok_or_else(|| {
+ nodemap_error(py, NodeMapError::RevisionNotInIndex(rev))
+ })?;
+ let stop_rev = if let Some(stop_rev) = stop_rev {
+ let stop_rev = UncheckedRevision(stop_rev);
+ Some(index.check_revision(stop_rev).ok_or_else(|| {
+ nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev))
+ })?)
+ } else {None};
+ let using_general_delta = args.get_item(py, 2)
+ .extract::<Option<u32>>(py)?
+ .map(|i| i != 0);
+ let (chain, stopped) = index.delta_chain(
+ rev, stop_rev, using_general_delta
+ ).map_err(|e| {
+ PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+ })?;
+
+ let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect();
+ Ok(
+ PyTuple::new(
+ py,
+ &[
+ chain.into_py_object(py).into_object(),
+ stopped.into_py_object(py).into_object()
+ ]
+ ).into_object()
+ )
+ }
+
+ /// slice planned chunk read to reach a density threshold
+ def _index_slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> {
+ let rust_res = self.inner_slicechunktodensity(
+ py,
+ args.get_item(py, 0),
+ args.get_item(py, 1).extract(py)?,
+ args.get_item(py, 2).extract(py)?
+ )?;
+ Ok(rust_res)
+ }
+
+ def _index___len__(&self) -> PyResult<usize> {
+ self.len(py)
+ }
+
+ def _index___getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+ let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
+ Ok(rust_res)
+ }
+
+ def _index___contains__(&self, item: PyObject) -> PyResult<bool> {
+ // ObjectProtocol does not seem to provide contains(), so
+ // this is an equivalent implementation of the index_contains()
+ // defined in revlog.c
+ match item.extract::<i32>(py) {
+ Ok(rev) => {
+ Ok(rev >= -1 && rev < self.len(py)? as BaseRevision)
+ }
+ Err(_) => {
+ let item_bytes: PyBytes = item.extract(py)?;
+ let rust_res = self._index_has_node(py, item_bytes)?;
+ Ok(rust_res)
+ }
+ }
+ }
+
+ def _index_nodemap_data_all(&self) -> PyResult<PyBytes> {
+ self.inner_nodemap_data_all(py)
+ }
+
+ def _index_nodemap_data_incremental(&self) -> PyResult<PyObject> {
+ self.inner_nodemap_data_incremental(py)
+ }
+
+ def _index_update_nodemap_data(
+ &self,
+ docket: PyObject,
+ nm_data: PyObject
+ ) -> PyResult<PyObject> {
+ self.inner_update_nodemap_data(py, docket, nm_data)
+ }
+
+ @property
+ def _index_entry_size(&self) -> PyResult<PyInt> {
+ let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py);
+ Ok(rust_res)
+ }
+
+ @property
+ def _index_rust_ext_compat(&self) -> PyResult<PyInt> {
+ // will be entirely removed when the Rust index yet useful to
+ // implement in Rust to detangle things when removing `self.cindex`
+ let rust_res: PyInt = 1.to_py_object(py);
+ Ok(rust_res)
+ }
+
+ @property
+ def _index_is_rust(&self) -> PyResult<PyBool> {
+ Ok(false.to_py_object(py))
+ }
+
+
+});
+
+/// Forwarded index methods?
+impl InnerRevlog {
fn len(&self, py: Python) -> PyResult<usize> {
- let rust_index_len = self.index(py).borrow().len();
+ let rust_index_len = self.inner(py).borrow().index.len();
Ok(rust_index_len)
}
-
/// This is scaffolding at this point, but it could also become
/// a way to start a persistent nodemap or perform a
/// vacuum / repack operation
@@ -659,11 +1412,11 @@
py: Python,
nt: &mut CoreNodeTree,
) -> PyResult<PyObject> {
- let index = self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
for r in 0..self.len(py)? {
let rev = Revision(r as BaseRevision);
// in this case node() won't ever return None
- nt.insert(&*index, index.node(rev).unwrap(), rev)
+ nt.insert(index, index.node(rev).expect("node should exist"), rev)
.map_err(|e| nodemap_error(py, e))?
}
Ok(py.None())
@@ -684,7 +1437,11 @@
/// Returns the full nodemap bytes to be written as-is to disk
fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> {
- let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+ let nodemap = self
+ .get_nodetree(py)?
+ .borrow_mut()
+ .take()
+ .expect("nodetree should exist");
let (readonly, bytes) = nodemap.into_readonly_and_added_bytes();
// If there's anything readonly, we need to build the data again from
@@ -717,7 +1474,11 @@
None => return Ok(py.None()),
};
- let node_tree = self.get_nodetree(py)?.borrow_mut().take().unwrap();
+ let node_tree = self
+ .get_nodetree(py)?
+ .borrow_mut()
+ .take()
+ .expect("nodetree should exist");
let masked_blocks = node_tree.masked_readonly_blocks();
let (_, data) = node_tree.into_readonly_and_added_bytes();
let changed = masked_blocks * std::mem::size_of::<Block>();
@@ -747,7 +1508,7 @@
.extract::<BaseRevision>(py)?
.into();
self.docket(py).borrow_mut().replace(docket.clone_ref(py));
- let idx = self.index(py).borrow();
+ let idx = &self.inner(py).borrow().index;
let data_tip = idx.check_revision(data_tip).ok_or_else(|| {
nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip))
})?;
@@ -756,7 +1517,7 @@
for r in (data_tip.0 + 1)..current_tip as BaseRevision {
let rev = Revision(r);
// in this case node() won't ever return None
- nt.insert(&*idx, idx.node(rev).unwrap(), rev)
+ nt.insert(idx, idx.node(rev).expect("node should exist"), rev)
.map_err(|e| nodemap_error(py, e))?
}
@@ -766,7 +1527,7 @@
}
fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
- let idx = self.index(py).borrow();
+ let idx = &self.inner(py).borrow().index;
Ok(match key.extract::<BaseRevision>(py) {
Ok(key_as_int) => {
let entry_params = if key_as_int == NULL_REVISION.0 {
@@ -786,15 +1547,17 @@
revision_data_params_to_py_tuple(py, entry_params)
.into_object()
}
- _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
- || py.None(),
- |py_rev| py_rev.into_py_object(py).into_object(),
- ),
+ _ => self
+ ._index_get_rev(py, key.extract::<PyBytes>(py)?)?
+ .map_or_else(
+ || py.None(),
+ |py_rev| py_rev.into_py_object(py).into_object(),
+ ),
})
}
fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
// We don't use the shortcut here, as it's actually slower to loop
// through the cached `PyList` than to re-do the whole computation for
@@ -826,7 +1589,7 @@
filtered_revs: &PyObject,
stop_rev: &PyObject,
) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let stop_rev = if stop_rev.is_none(py) {
None
} else {
@@ -899,7 +1662,7 @@
) -> PyResult<PyObject> {
let begin = begin.extract::<BaseRevision>(py)?;
let end = end.extract::<BaseRevision>(py)?;
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let begin =
Self::check_revision(index, UncheckedRevision(begin - 1), py)?;
let end = Self::check_revision(index, UncheckedRevision(end - 1), py)?;
@@ -918,7 +1681,7 @@
new_heads: &[Revision],
py: Python<'_>,
) -> PyList {
- let index = self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let as_vec: Vec<PyObject> = new_heads
.iter()
.map(|r| {
@@ -958,7 +1721,7 @@
py: Python,
py_revs: &PyTuple,
) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
let as_vec: Vec<_> = index
.ancestors(&revs)
@@ -974,7 +1737,7 @@
py: Python,
py_revs: &PyTuple,
) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?;
let as_vec: Vec<_> = index
.common_ancestor_heads(&revs)
@@ -990,7 +1753,7 @@
py: Python,
py_roots: PyDict,
) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
.items_list(py)
.iter(py)
@@ -1037,7 +1800,7 @@
target_density: f64,
min_gap_size: usize,
) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?;
let as_nested_vec =
index.slice_chunk_to_density(&revs, target_density, min_gap_size);
@@ -1069,7 +1832,7 @@
roots: PyObject,
include_path: bool,
) -> PyResult<PyObject> {
- let index = &*self.index(py).borrow();
+ let index = &self.inner(py).borrow().index;
let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| {
PyErr::new::<IndexError, _>(py, "head out of range")
})?;
@@ -1091,6 +1854,84 @@
.collect();
Ok(PyList::new(py, &as_vec).into_object())
}
+ fn inner_issnapshot(
+ &self,
+ py: Python,
+ rev: UncheckedRevision,
+ ) -> PyResult<bool> {
+ let inner = &self.inner(py).borrow();
+ let index = &self.inner(py).borrow().index;
+ let rev = index
+ .check_revision(rev)
+ .ok_or_else(|| rev_not_in_index(py, rev))?;
+ let result = inner.is_snapshot(rev).map_err(|e| {
+ PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string())
+ })?;
+ Ok(result)
+ }
+}
+
+impl InnerRevlog {
+ pub fn inner_new(
+ py: Python,
+ opener: PyObject,
+ index_data: PyObject,
+ index_file: PyObject,
+ data_file: PyObject,
+ _sidedata_file: PyObject,
+ inline: bool,
+ data_config: PyObject,
+ delta_config: PyObject,
+ feature_config: PyObject,
+ _chunk_cache: PyObject,
+ _default_compression_header: PyObject,
+ revlog_type: usize,
+ ) -> PyResult<Self> {
+ let vfs = Box::new(PyVfs::new(py, opener)?);
+ let index_file =
+ get_path_from_bytes(index_file.extract::<PyBytes>(py)?.data(py))
+ .to_owned();
+ let data_file =
+ get_path_from_bytes(data_file.extract::<PyBytes>(py)?.data(py))
+ .to_owned();
+ let revlog_type = RevlogType::try_from(revlog_type)
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let data_config = extract_data_config(py, data_config, revlog_type)?;
+ let delta_config =
+ extract_delta_config(py, delta_config, revlog_type)?;
+ let feature_config =
+ extract_feature_config(py, feature_config, revlog_type)?;
+ let options = RevlogOpenOptions::new(
+ inline,
+ data_config,
+ delta_config,
+ feature_config,
+ );
+ // Safety: we keep the buffer around inside the class as `index_mmap`
+ let (buf, bytes) = unsafe { mmap_keeparound(py, index_data)? };
+ let index = hg::index::Index::new(bytes, options.index_header())
+ .map_err(|e| revlog_error_from_msg(py, e))?;
+ let core = CoreInnerRevlog::new(
+ vfs,
+ index,
+ index_file,
+ data_file,
+ data_config,
+ delta_config,
+ feature_config,
+ );
+ Self::create_instance(
+ py,
+ core,
+ RefCell::new(None),
+ RefCell::new(None),
+ RefCell::new(None),
+ RefCell::new(buf),
+ RefCell::new(None),
+ RefCell::new(None),
+ RefCell::new(None),
+ )
+ }
}
py_class!(pub class NodeTree |py| {
@@ -1111,7 +1952,7 @@
/// (generation-based guard), same as iterating on a `dict` that has
/// been meanwhile mutated.
def is_invalidated(&self) -> PyResult<bool> {
- let leaked = self.index(py).borrow();
+ let leaked = &self.index(py).borrow();
// Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
let result = unsafe { leaked.try_borrow(py) };
// two cases for result to be an error:
@@ -1123,7 +1964,7 @@
}
def insert(&self, rev: PyRevision) -> PyResult<PyObject> {
- let leaked = self.index(py).borrow();
+ let leaked = &self.index(py).borrow();
// Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
let index = &*unsafe { leaked.try_borrow(py)? };
@@ -1135,7 +1976,7 @@
return Err(rev_not_in_index(py, rev.into()))
}
- let entry = index.inner.get_entry(rev).unwrap();
+ let entry = index.inner.get_entry(rev).expect("entry should exist");
let mut nt = self.nt(py).borrow_mut();
nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?;
@@ -1158,7 +1999,7 @@
)?;
let nt = self.nt(py).borrow();
- let leaked = self.index(py).borrow();
+ let leaked = &self.index(py).borrow();
// Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
let index = &*unsafe { leaked.try_borrow(py)? };
@@ -1170,7 +2011,7 @@
def shortest(&self, node: PyBytes) -> PyResult<usize> {
let nt = self.nt(py).borrow();
- let leaked = self.index(py).borrow();
+ let leaked = &self.index(py).borrow();
// Safety: we don't leak the "faked" reference out of `UnsafePyLeaked`
let idx = &*unsafe { leaked.try_borrow(py)? };
match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?)
@@ -1182,6 +2023,120 @@
}
});
+fn panic_after_error(_py: Python) -> ! {
+ unsafe {
+ python3_sys::PyErr_Print();
+ }
+ panic!("Python API called failed");
+}
+
+/// # Safety
+///
+/// Don't call this. Its only caller is taken from `PyO3`.
+unsafe fn cast_from_owned_ptr_or_panic<T>(
+ py: Python,
+ p: *mut python3_sys::PyObject,
+) -> T
+where
+ T: cpython::PythonObjectWithCheckedDowncast,
+{
+ if p.is_null() {
+ panic_after_error(py);
+ } else {
+ PyObject::from_owned_ptr(py, p).cast_into(py).unwrap()
+ }
+}
+
+fn with_pybytes_buffer<F>(
+ py: Python,
+ len: usize,
+ init: F,
+) -> Result<PyBytes, RevlogError>
+where
+ F: FnOnce(
+ &mut dyn RevisionBuffer<Target = PyBytes>,
+ ) -> Result<(), RevlogError>,
+{
+ // Largely inspired by code in PyO3
+ // https://pyo3.rs/main/doc/pyo3/types/struct.pybytes#method.new_bound_with
+ unsafe {
+ let pyptr = python3_sys::PyBytes_FromStringAndSize(
+ std::ptr::null(),
+ len as python3_sys::Py_ssize_t,
+ );
+ let pybytes = cast_from_owned_ptr_or_panic::<PyBytes>(py, pyptr);
+ let buffer: *mut u8 = python3_sys::PyBytes_AsString(pyptr).cast();
+ debug_assert!(!buffer.is_null());
+ let mut rev_buf = PyRevisionBuffer::new(pybytes, buffer, len);
+ // Initialise the bytestring in init
+ // If init returns an Err, the buffer is deallocated by `pybytes`
+ init(&mut rev_buf).map(|_| rev_buf.finish())
+ }
+}
+
+/// Wrapper around a Python-provided buffer into which the revision contents
+/// will be written. Done for speed in order to save a large allocation + copy.
+struct PyRevisionBuffer {
+ py_bytes: PyBytes,
+ _buf: *mut u8,
+ len: usize,
+ current_buf: *mut u8,
+ current_len: usize,
+}
+
+impl PyRevisionBuffer {
+ /// # Safety
+ ///
+ /// `buf` should be the start of the allocated bytes of `bytes`, and `len`
+ /// exactly the length of said allocated bytes.
+ #[inline]
+ unsafe fn new(bytes: PyBytes, buf: *mut u8, len: usize) -> Self {
+ Self {
+ py_bytes: bytes,
+ _buf: buf,
+ len,
+ current_len: 0,
+ current_buf: buf,
+ }
+ }
+
+ /// Number of bytes that have been copied to. Will be different to the
+ /// total allocated length of the buffer unless the revision is done being
+ /// written.
+ #[inline]
+ fn current_len(&self) -> usize {
+ self.current_len
+ }
+}
+
+impl RevisionBuffer for PyRevisionBuffer {
+ type Target = PyBytes;
+
+ #[inline]
+ fn extend_from_slice(&mut self, slice: &[u8]) {
+ assert!(self.current_len + slice.len() <= self.len);
+ unsafe {
+ // We cannot use `copy_from_nonoverlapping` since it's *possible*
+ // to create a slice from the same Python memory region using
+ // [`PyBytesDeref`]. Probable that LLVM has an optimization anyway?
+ self.current_buf.copy_from(slice.as_ptr(), slice.len());
+ self.current_buf = self.current_buf.add(slice.len());
+ }
+ self.current_len += slice.len()
+ }
+
+ #[inline]
+ fn finish(self) -> Self::Target {
+ // catch unzeroed bytes before it becomes undefined behavior
+ assert_eq!(
+ self.current_len(),
+ self.len,
+ "not enough bytes read for revision"
+ );
+ self.py_bytes
+ }
+}
+
fn revlog_error(py: Python) -> PyErr {
match py
.import("mercurial.error")
@@ -1195,21 +2150,6 @@
}
}
-fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr {
- match py
- .import("mercurial.error")
- .and_then(|m| m.get(py, "RevlogError"))
- {
- Err(e) => e,
- Ok(cls) => PyErr::from_instance(
- py,
- cls.call(py, (PyBytes::new(py, msg),), None)
- .ok()
- .into_py_object(py),
- ),
- }
-}
-
fn graph_error(py: Python, _err: hg::GraphError) -> PyErr {
// ParentOutOfRange is currently the only alternative
// in `hg::GraphError`. The C index always raises this simple ValueError.
@@ -1249,8 +2189,8 @@
m.add(py, "__package__", package)?;
m.add(py, "__doc__", "RevLog - Rust implementations")?;
- m.add_class::<Index>(py)?;
m.add_class::<NodeTree>(py)?;
+ m.add_class::<InnerRevlog>(py)?;
let sys = PyModule::import(py, "sys")?;
let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/rust/hg-cpython/src/vfs.rs Wed Jun 19 19:10:49 2024 +0200
@@ -0,0 +1,289 @@
+use std::{
+ cell::Cell,
+ fs::File,
+ io::Error,
+ os::fd::{AsRawFd, FromRawFd},
+ path::{Path, PathBuf},
+};
+
+use cpython::{
+ ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyObject,
+ PyResult, PyTuple, Python, PythonObject, ToPyObject,
+};
+use hg::{
+ errors::{HgError, IoResultExt},
+ exit_codes,
+ utils::files::{get_bytes_from_path, get_path_from_bytes},
+ vfs::Vfs,
+};
+
+/// Wrapper around a Python VFS object to call back into Python from `hg-core`.
+pub struct PyVfs {
+ inner: PyObject,
+}
+
+impl Clone for PyVfs {
+ fn clone(&self) -> Self {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ Self {
+ inner: self.inner.clone_ref(py),
+ }
+ }
+}
+
+impl PyVfs {
+ pub fn new(_py: Python, py_vfs: PyObject) -> PyResult<Self> {
+ Ok(Self { inner: py_vfs })
+ }
+
+ fn inner_open(
+ &self,
+ filename: &Path,
+ create: bool,
+ check_ambig: bool,
+ atomic_temp: bool,
+ write: bool,
+ ) -> Result<(File, Option<PathBuf>), HgError> {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ let mode = if atomic_temp {
+ PyBytes::new(py, b"w")
+ } else if create {
+ PyBytes::new(py, b"w+")
+ } else if write {
+ PyBytes::new(py, b"r+")
+ } else {
+ PyBytes::new(py, b"rb")
+ };
+ let res = self.inner.call(
+ py,
+ (
+ PyBytes::new(py, &get_bytes_from_path(filename)),
+ mode,
+ atomic_temp,
+ check_ambig,
+ ),
+ None,
+ );
+ match res {
+ Ok(tup) => {
+ let tup = tup
+ .extract::<PyTuple>(py)
+ .map_err(|e| vfs_error("vfs did not return a tuple", e))?;
+ let fileno = tup.get_item(py, 0).extract(py).map_err(|e| {
+ vfs_error("vfs did not return a valid fileno", e)
+ })?;
+ let temp_name = tup.get_item(py, 1);
+ // Safety: this must be a valid owned file descriptor, and
+ // Python has just given it to us, it will only exist here now
+ let file = unsafe { File::from_raw_fd(fileno) };
+ let temp_name = if atomic_temp {
+ Some(
+ get_path_from_bytes(
+ temp_name
+ .extract::<PyBytes>(py)
+ .map_err(|e| vfs_error("invalid tempname", e))?
+ .data(py),
+ )
+ .to_owned(),
+ )
+ } else {
+ None
+ };
+ Ok((file, temp_name))
+ }
+ Err(mut e) => {
+ // TODO surely there is a better way of comparing
+ if e.instance(py).get_type(py).name(py) == "FileNotFoundError"
+ {
+ return Err(HgError::IoError {
+ error: Error::new(
+ std::io::ErrorKind::NotFound,
+ e.instance(py).to_string(),
+ ),
+ context: hg::errors::IoErrorContext::ReadingFile(
+ filename.to_owned(),
+ ),
+ });
+ }
+ Err(vfs_error("failed to call opener", e))
+ }
+ }
+ }
+}
+
+fn vfs_error(reason: impl Into<String>, mut error: PyErr) -> HgError {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ HgError::abort(
+ format!("{}: {}", reason.into(), error.instance(py)),
+ exit_codes::ABORT,
+ None,
+ )
+}
+
+py_class!(pub class PyFile |py| {
+ data number: Cell<i32>;
+
+ def fileno(&self) -> PyResult<PyInt> {
+ Ok(self.number(py).get().to_py_object(py))
+ }
+});
+
+impl Vfs for PyVfs {
+ fn open(&self, filename: &Path) -> Result<File, HgError> {
+ self.inner_open(filename, false, false, false, true)
+ .map(|(f, _)| f)
+ }
+ fn open_read(&self, filename: &Path) -> Result<File, HgError> {
+ self.inner_open(filename, false, false, false, false)
+ .map(|(f, _)| f)
+ }
+
+ fn open_check_ambig(
+ &self,
+ filename: &Path,
+ ) -> Result<std::fs::File, HgError> {
+ self.inner_open(filename, false, true, false, true)
+ .map(|(f, _)| f)
+ }
+
+ fn create(&self, filename: &Path) -> Result<std::fs::File, HgError> {
+ self.inner_open(filename, true, false, false, true)
+ .map(|(f, _)| f)
+ }
+
+ fn create_atomic(
+ &self,
+ filename: &Path,
+ check_ambig: bool,
+ ) -> Result<hg::vfs::AtomicFile, HgError> {
+ self.inner_open(filename, true, false, true, true).map(
+ |(fp, temp_name)| {
+ hg::vfs::AtomicFile::new(
+ fp,
+ check_ambig,
+ temp_name.expect("temp name should exist"),
+ filename.to_owned(),
+ )
+ },
+ )
+ }
+
+ fn file_size(&self, file: &File) -> Result<u64, HgError> {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ let raw_fd = file.as_raw_fd();
+ let py_fd = PyFile::create_instance(py, Cell::new(raw_fd))
+ .expect("create_instance cannot fail");
+ let fstat = self
+ .inner
+ .call_method(py, "fstat", (py_fd,), None)
+ .map_err(|e| {
+ vfs_error(format!("failed to fstat fd '{}'", raw_fd), e)
+ })?;
+ fstat
+ .getattr(py, "st_size")
+ .map(|v| {
+ v.extract(py).map_err(|e| {
+ vfs_error(format!("invalid size for fd '{}'", raw_fd), e)
+ })
+ })
+ .map_err(|e| {
+ vfs_error(format!("failed to get size of fd '{}'", raw_fd), e)
+ })?
+ }
+
+ fn exists(&self, filename: &Path) -> bool {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ self.inner
+ .call_method(
+ py,
+ "exists",
+ (PyBytes::new(py, &get_bytes_from_path(filename)),),
+ None,
+ )
+ .unwrap_or_else(|_| false.into_py_object(py).into_object())
+ .extract(py)
+ .unwrap()
+ }
+
+ fn unlink(&self, filename: &Path) -> Result<(), HgError> {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ if let Err(e) = self.inner.call_method(
+ py,
+ "unlink",
+ (PyBytes::new(py, &get_bytes_from_path(filename)),),
+ None,
+ ) {
+ return Err(vfs_error(
+ format!("failed to unlink '{}'", filename.display()),
+ e,
+ ));
+ }
+ Ok(())
+ }
+
+ fn rename(
+ &self,
+ from: &Path,
+ to: &Path,
+ check_ambig: bool,
+ ) -> Result<(), HgError> {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ let kwargs = PyDict::new(py);
+ kwargs
+ .set_item(py, "checkambig", check_ambig)
+ .map_err(|e| vfs_error("dict setitem failed", e))?;
+ if let Err(e) = self.inner.call_method(
+ py,
+ "rename",
+ (
+ PyBytes::new(py, &get_bytes_from_path(from)),
+ PyBytes::new(py, &get_bytes_from_path(to)),
+ ),
+ Some(&kwargs),
+ ) {
+ let msg = format!(
+ "failed to rename '{}' to '{}'",
+ from.display(),
+ to.display()
+ );
+ return Err(vfs_error(msg, e));
+ }
+ Ok(())
+ }
+
+ fn copy(&self, from: &Path, to: &Path) -> Result<(), HgError> {
+ let gil = &Python::acquire_gil();
+ let py = gil.python();
+ let from = self
+ .inner
+ .call_method(
+ py,
+ "join",
+ (PyBytes::new(py, &get_bytes_from_path(from)),),
+ None,
+ )
+ .unwrap();
+ let from = from.extract::<PyBytes>(py).unwrap();
+ let from = get_path_from_bytes(from.data(py));
+ let to = self
+ .inner
+ .call_method(
+ py,
+ "join",
+ (PyBytes::new(py, &get_bytes_from_path(to)),),
+ None,
+ )
+ .unwrap();
+ let to = to.extract::<PyBytes>(py).unwrap();
+ let to = get_path_from_bytes(to.data(py));
+ std::fs::copy(from, to).when_writing_file(to)?;
+ Ok(())
+ }
+}