Mercurial > hg-stable
changeset 51226:002b49905aac
rust-index: implementation of __getitem__
Although the removed panic tends to prove if the full test suite
did pass that the case when the input is a node id does not happen,
it is best not to remove it right now.
Raising IndexError is crucial for iteration on the index to stop,
given the default CPython sequence iterator, see for instance
https://github.com/zpoint/CPython-Internals/blobs/master/BasicObject/iter/iter.md
This was spotted by `test-rust-ancestors.py`, which does simple interations on
indexes (as preflight checks).
In `revlog.c`, `index_getitem` defaults to `index_get` when called
on revision numbers, which does raise `IndexError` with the same message as
the one we are introducing here.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Thu, 02 Nov 2023 11:19:54 +0100 |
parents | 297fa956b6c4 |
children | 952e3cd7568f |
files | rust/hg-core/src/revlog/index.rs rust/hg-cpython/src/revlog.rs |
diffstat | 2 files changed, 134 insertions(+), 15 deletions(-) [+] |
line wrap: on
line diff
--- a/rust/hg-core/src/revlog/index.rs Wed Sep 27 11:34:52 2023 +0200 +++ b/rust/hg-core/src/revlog/index.rs Thu Nov 02 11:19:54 2023 +0100 @@ -123,6 +123,10 @@ } Ok(()) } + + fn is_new(&self) -> bool { + self.bytes.is_empty() + } } impl std::ops::Index<std::ops::Range<usize>> for IndexData { @@ -146,6 +150,7 @@ } } +#[derive(Debug, PartialEq, Eq)] pub struct RevisionDataParams { pub flags: u16, pub data_offset: u64, @@ -163,6 +168,27 @@ pub _rank: i32, } +impl Default for RevisionDataParams { + fn default() -> Self { + Self { + flags: 0, + data_offset: 0, + data_compressed_length: 0, + data_uncompressed_length: 0, + data_delta_base: -1, + link_rev: -1, + parent_rev_1: -1, + parent_rev_2: -1, + node_id: [0; NODE_BYTES_LENGTH], + _sidedata_offset: 0, + _sidedata_compressed_length: 0, + data_compression_mode: COMPRESSION_MODE_INLINE, + _sidedata_compression_mode: COMPRESSION_MODE_INLINE, + _rank: -1, + } + } +} + #[derive(BytesCast)] #[repr(C)] pub struct RevisionDataV1 { @@ -397,6 +423,29 @@ }) } + pub fn entry_as_params( + &self, + rev: UncheckedRevision, + ) -> Option<RevisionDataParams> { + let rev = self.check_revision(rev)?; + self.get_entry(rev).map(|e| RevisionDataParams { + flags: e.flags(), + data_offset: if rev.0 == 0 && !self.bytes.is_new() { + e.flags() as u64 + } else { + e.raw_offset() + }, + data_compressed_length: e.compressed_len().try_into().unwrap(), + data_uncompressed_length: e.uncompressed_len(), + data_delta_base: e.base_revision_or_base_of_delta_chain().0, + link_rev: e.link_revision().0, + parent_rev_1: e.p1().0, + parent_rev_2: e.p2().0, + node_id: e.hash().as_bytes().try_into().unwrap(), + ..Default::default() + }) + } + fn get_entry_inline( &self, rev: Revision, @@ -519,6 +568,9 @@ BigEndian::read_u64(&bytes[..]) as usize } } + pub fn raw_offset(&self) -> u64 { + BigEndian::read_u64(&self.bytes[0..8]) + } pub fn flags(&self) -> u16 { BigEndian::read_u16(&self.bytes[6..=7])
--- a/rust/hg-cpython/src/revlog.rs Wed Sep 27 11:34:52 2023 +0200 +++ b/rust/hg-cpython/src/revlog.rs Thu Nov 02 11:19:54 2023 +0100 @@ -17,11 +17,10 @@ PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, }; use hg::{ - index::IndexHeader, - index::{RevisionDataParams, COMPRESSION_MODE_INLINE}, + index::{IndexHeader, RevisionDataParams}, nodemap::{Block, NodeMapError, NodeTree}, revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, - BaseRevision, Revision, UncheckedRevision, + BaseRevision, Revision, UncheckedRevision, NULL_REVISION, }; use std::cell::RefCell; @@ -237,11 +236,6 @@ Ok(rust_res) } - /// get an index entry - def get(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "get", args, kw) - } - /// compute phases def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> { self.call_cindex(py, "computephasesmapsets", args, kw) @@ -292,23 +286,32 @@ // Since we call back through the high level Python API, // there's no point making a distinction between index_get // and index_getitem. + // gracinet 2023: this above is no longer true for the pure Rust impl def __len__(&self) -> PyResult<usize> { self.len(py) } def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { + let rust_res = self.inner_getitem(py, key.clone_ref(py))?; + // this conversion seems needless, but that's actually because // `index_getitem` does not handle conversion from PyLong, // which expressions such as [e for e in index] internally use. // Note that we don't seem to have a direct way to call // PySequence_GetItem (does the job), which would possibly be better // for performance - let key = match key.extract::<i32>(py) { + // gracinet 2023: the above comment can be removed when we use + // the pure Rust impl only. Note also that `key` can be a binary + // node id. + let c_key = match key.extract::<BaseRevision>(py) { Ok(rev) => rev.to_py_object(py).into_object(), Err(_) => key, }; - self.cindex(py).borrow().inner().get_item(py, key) + let c_res = self.cindex(py).borrow().inner().get_item(py, c_key)?; + + assert_py_eq(py, "__getitem__", &rust_res, &c_res)?; + Ok(rust_res) } def __contains__(&self, item: PyObject) -> PyResult<bool> { @@ -426,13 +429,49 @@ parent_rev_1: tuple.get_item(py, 5).extract(py)?, parent_rev_2: tuple.get_item(py, 6).extract(py)?, node_id, - _sidedata_offset: 0, - _sidedata_compressed_length: 0, - data_compression_mode: COMPRESSION_MODE_INLINE, - _sidedata_compression_mode: COMPRESSION_MODE_INLINE, - _rank: -1, + ..Default::default() }) } +fn revision_data_params_to_py_tuple( + py: Python, + params: RevisionDataParams, +) -> PyTuple { + PyTuple::new( + py, + &[ + params.data_offset.into_py_object(py).into_object(), + params + .data_compressed_length + .into_py_object(py) + .into_object(), + params + .data_uncompressed_length + .into_py_object(py) + .into_object(), + params.data_delta_base.into_py_object(py).into_object(), + params.link_rev.into_py_object(py).into_object(), + params.parent_rev_1.into_py_object(py).into_object(), + params.parent_rev_2.into_py_object(py).into_object(), + PyBytes::new(py, ¶ms.node_id) + .into_py_object(py) + .into_object(), + params._sidedata_offset.into_py_object(py).into_object(), + params + ._sidedata_compressed_length + .into_py_object(py) + .into_object(), + params + .data_compression_mode + .into_py_object(py) + .into_object(), + params + ._sidedata_compression_mode + .into_py_object(py) + .into_object(), + params._rank.into_py_object(py).into_object(), + ], + ) +} impl MixedIndex { fn new( @@ -601,6 +640,34 @@ Ok(py.None()) } + + fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> { + let idx = self.index(py).borrow(); + Ok(match key.extract::<BaseRevision>(py) { + Ok(key_as_int) => { + let entry_params = if key_as_int == NULL_REVISION.0 { + RevisionDataParams::default() + } else { + let rev = UncheckedRevision(key_as_int); + match idx.entry_as_params(rev) { + Some(e) => e, + None => { + return Err(PyErr::new::<IndexError, _>( + py, + "revlog index out of range", + )); + } + } + }; + revision_data_params_to_py_tuple(py, entry_params) + .into_object() + } + _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else( + || py.None(), + |py_rev| py_rev.into_py_object(py).into_object(), + ), + }) + } } fn revlog_error(py: Python) -> PyErr {