# HG changeset patch # User Raphaël Gomès # Date 1698920394 -3600 # Node ID 002b49905aac371c6e770c0289f6d088357c8c10 # Parent 297fa956b6c4e328e8c66e3f14226380ca157efd rust-index: implementation of __getitem__ Although the removed panic tends to prove if the full test suite did pass that the case when the input is a node id does not happen, it is best not to remove it right now. Raising IndexError is crucial for iteration on the index to stop, given the default CPython sequence iterator, see for instance https://github.com/zpoint/CPython-Internals/blobs/master/BasicObject/iter/iter.md This was spotted by `test-rust-ancestors.py`, which does simple interations on indexes (as preflight checks). In `revlog.c`, `index_getitem` defaults to `index_get` when called on revision numbers, which does raise `IndexError` with the same message as the one we are introducing here. diff -r 297fa956b6c4 -r 002b49905aac rust/hg-core/src/revlog/index.rs --- a/rust/hg-core/src/revlog/index.rs Wed Sep 27 11:34:52 2023 +0200 +++ b/rust/hg-core/src/revlog/index.rs Thu Nov 02 11:19:54 2023 +0100 @@ -123,6 +123,10 @@ } Ok(()) } + + fn is_new(&self) -> bool { + self.bytes.is_empty() + } } impl std::ops::Index> for IndexData { @@ -146,6 +150,7 @@ } } +#[derive(Debug, PartialEq, Eq)] pub struct RevisionDataParams { pub flags: u16, pub data_offset: u64, @@ -163,6 +168,27 @@ pub _rank: i32, } +impl Default for RevisionDataParams { + fn default() -> Self { + Self { + flags: 0, + data_offset: 0, + data_compressed_length: 0, + data_uncompressed_length: 0, + data_delta_base: -1, + link_rev: -1, + parent_rev_1: -1, + parent_rev_2: -1, + node_id: [0; NODE_BYTES_LENGTH], + _sidedata_offset: 0, + _sidedata_compressed_length: 0, + data_compression_mode: COMPRESSION_MODE_INLINE, + _sidedata_compression_mode: COMPRESSION_MODE_INLINE, + _rank: -1, + } + } +} + #[derive(BytesCast)] #[repr(C)] pub struct RevisionDataV1 { @@ -397,6 +423,29 @@ }) } + pub fn entry_as_params( + &self, + rev: UncheckedRevision, + ) -> Option { + let rev = self.check_revision(rev)?; + self.get_entry(rev).map(|e| RevisionDataParams { + flags: e.flags(), + data_offset: if rev.0 == 0 && !self.bytes.is_new() { + e.flags() as u64 + } else { + e.raw_offset() + }, + data_compressed_length: e.compressed_len().try_into().unwrap(), + data_uncompressed_length: e.uncompressed_len(), + data_delta_base: e.base_revision_or_base_of_delta_chain().0, + link_rev: e.link_revision().0, + parent_rev_1: e.p1().0, + parent_rev_2: e.p2().0, + node_id: e.hash().as_bytes().try_into().unwrap(), + ..Default::default() + }) + } + fn get_entry_inline( &self, rev: Revision, @@ -519,6 +568,9 @@ BigEndian::read_u64(&bytes[..]) as usize } } + pub fn raw_offset(&self) -> u64 { + BigEndian::read_u64(&self.bytes[0..8]) + } pub fn flags(&self) -> u16 { BigEndian::read_u16(&self.bytes[6..=7]) diff -r 297fa956b6c4 -r 002b49905aac rust/hg-cpython/src/revlog.rs --- a/rust/hg-cpython/src/revlog.rs Wed Sep 27 11:34:52 2023 +0200 +++ b/rust/hg-cpython/src/revlog.rs Thu Nov 02 11:19:54 2023 +0100 @@ -17,11 +17,10 @@ PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, }; use hg::{ - index::IndexHeader, - index::{RevisionDataParams, COMPRESSION_MODE_INLINE}, + index::{IndexHeader, RevisionDataParams}, nodemap::{Block, NodeMapError, NodeTree}, revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, - BaseRevision, Revision, UncheckedRevision, + BaseRevision, Revision, UncheckedRevision, NULL_REVISION, }; use std::cell::RefCell; @@ -237,11 +236,6 @@ Ok(rust_res) } - /// get an index entry - def get(&self, *args, **kw) -> PyResult { - self.call_cindex(py, "get", args, kw) - } - /// compute phases def computephasesmapsets(&self, *args, **kw) -> PyResult { self.call_cindex(py, "computephasesmapsets", args, kw) @@ -292,23 +286,32 @@ // Since we call back through the high level Python API, // there's no point making a distinction between index_get // and index_getitem. + // gracinet 2023: this above is no longer true for the pure Rust impl def __len__(&self) -> PyResult { self.len(py) } def __getitem__(&self, key: PyObject) -> PyResult { + let rust_res = self.inner_getitem(py, key.clone_ref(py))?; + // this conversion seems needless, but that's actually because // `index_getitem` does not handle conversion from PyLong, // which expressions such as [e for e in index] internally use. // Note that we don't seem to have a direct way to call // PySequence_GetItem (does the job), which would possibly be better // for performance - let key = match key.extract::(py) { + // gracinet 2023: the above comment can be removed when we use + // the pure Rust impl only. Note also that `key` can be a binary + // node id. + let c_key = match key.extract::(py) { Ok(rev) => rev.to_py_object(py).into_object(), Err(_) => key, }; - self.cindex(py).borrow().inner().get_item(py, key) + let c_res = self.cindex(py).borrow().inner().get_item(py, c_key)?; + + assert_py_eq(py, "__getitem__", &rust_res, &c_res)?; + Ok(rust_res) } def __contains__(&self, item: PyObject) -> PyResult { @@ -426,13 +429,49 @@ parent_rev_1: tuple.get_item(py, 5).extract(py)?, parent_rev_2: tuple.get_item(py, 6).extract(py)?, node_id, - _sidedata_offset: 0, - _sidedata_compressed_length: 0, - data_compression_mode: COMPRESSION_MODE_INLINE, - _sidedata_compression_mode: COMPRESSION_MODE_INLINE, - _rank: -1, + ..Default::default() }) } +fn revision_data_params_to_py_tuple( + py: Python, + params: RevisionDataParams, +) -> PyTuple { + PyTuple::new( + py, + &[ + params.data_offset.into_py_object(py).into_object(), + params + .data_compressed_length + .into_py_object(py) + .into_object(), + params + .data_uncompressed_length + .into_py_object(py) + .into_object(), + params.data_delta_base.into_py_object(py).into_object(), + params.link_rev.into_py_object(py).into_object(), + params.parent_rev_1.into_py_object(py).into_object(), + params.parent_rev_2.into_py_object(py).into_object(), + PyBytes::new(py, ¶ms.node_id) + .into_py_object(py) + .into_object(), + params._sidedata_offset.into_py_object(py).into_object(), + params + ._sidedata_compressed_length + .into_py_object(py) + .into_object(), + params + .data_compression_mode + .into_py_object(py) + .into_object(), + params + ._sidedata_compression_mode + .into_py_object(py) + .into_object(), + params._rank.into_py_object(py).into_object(), + ], + ) +} impl MixedIndex { fn new( @@ -601,6 +640,34 @@ Ok(py.None()) } + + fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult { + let idx = self.index(py).borrow(); + Ok(match key.extract::(py) { + Ok(key_as_int) => { + let entry_params = if key_as_int == NULL_REVISION.0 { + RevisionDataParams::default() + } else { + let rev = UncheckedRevision(key_as_int); + match idx.entry_as_params(rev) { + Some(e) => e, + None => { + return Err(PyErr::new::( + py, + "revlog index out of range", + )); + } + } + }; + revision_data_params_to_py_tuple(py, entry_params) + .into_object() + } + _ => self.get_rev(py, key.extract::(py)?)?.map_or_else( + || py.None(), + |py_rev| py_rev.into_py_object(py).into_object(), + ), + }) + } } fn revlog_error(py: Python) -> PyErr {