Mercurial > hg-stable

--- a/rust/hg-core/src/revlog/index.rs	Wed Sep 27 11:34:52 2023 +0200
+++ b/rust/hg-core/src/revlog/index.rs	Thu Nov 02 11:19:54 2023 +0100
@@ -123,6 +123,10 @@
         }
         Ok(())
     }
+
+    fn is_new(&self) -> bool {
+        self.bytes.is_empty()
+    }
 }

 impl std::ops::Index<std::ops::Range<usize>> for IndexData {
@@ -146,6 +150,7 @@
     }
 }

+#[derive(Debug, PartialEq, Eq)]
 pub struct RevisionDataParams {
     pub flags: u16,
     pub data_offset: u64,
@@ -163,6 +168,27 @@
     pub _rank: i32,
 }

+impl Default for RevisionDataParams {
+    fn default() -> Self {
+        Self {
+            flags: 0,
+            data_offset: 0,
+            data_compressed_length: 0,
+            data_uncompressed_length: 0,
+            data_delta_base: -1,
+            link_rev: -1,
+            parent_rev_1: -1,
+            parent_rev_2: -1,
+            node_id: [0; NODE_BYTES_LENGTH],
+            _sidedata_offset: 0,
+            _sidedata_compressed_length: 0,
+            data_compression_mode: COMPRESSION_MODE_INLINE,
+            _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
+            _rank: -1,
+        }
+    }
+}
+
 #[derive(BytesCast)]
 #[repr(C)]
 pub struct RevisionDataV1 {
@@ -397,6 +423,29 @@
         })
     }

+    pub fn entry_as_params(
+        &self,
+        rev: UncheckedRevision,
+    ) -> Option<RevisionDataParams> {
+        let rev = self.check_revision(rev)?;
+        self.get_entry(rev).map(|e| RevisionDataParams {
+            flags: e.flags(),
+            data_offset: if rev.0 == 0 && !self.bytes.is_new() {
+                e.flags() as u64
+            } else {
+                e.raw_offset()
+            },
+            data_compressed_length: e.compressed_len().try_into().unwrap(),
+            data_uncompressed_length: e.uncompressed_len(),
+            data_delta_base: e.base_revision_or_base_of_delta_chain().0,
+            link_rev: e.link_revision().0,
+            parent_rev_1: e.p1().0,
+            parent_rev_2: e.p2().0,
+            node_id: e.hash().as_bytes().try_into().unwrap(),
+            ..Default::default()
+        })
+    }
+
     fn get_entry_inline(
         &self,
         rev: Revision,
@@ -519,6 +568,9 @@
             BigEndian::read_u64(&bytes[..]) as usize
         }
     }
+    pub fn raw_offset(&self) -> u64 {
+        BigEndian::read_u64(&self.bytes[0..8])
+    }

     pub fn flags(&self) -> u16 {
         BigEndian::read_u16(&self.bytes[6..=7])
--- a/rust/hg-cpython/src/revlog.rs	Wed Sep 27 11:34:52 2023 +0200
+++ b/rust/hg-cpython/src/revlog.rs	Thu Nov 02 11:19:54 2023 +0100
@@ -17,11 +17,10 @@
     PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject,
 };
 use hg::{
-    index::IndexHeader,
-    index::{RevisionDataParams, COMPRESSION_MODE_INLINE},
+    index::{IndexHeader, RevisionDataParams},
     nodemap::{Block, NodeMapError, NodeTree},
     revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex},
-    BaseRevision, Revision, UncheckedRevision,
+    BaseRevision, Revision, UncheckedRevision, NULL_REVISION,
 };
 use std::cell::RefCell;

@@ -237,11 +236,6 @@
         Ok(rust_res)
     }

-    /// get an index entry
-    def get(&self, *args, **kw) -> PyResult<PyObject> {
-        self.call_cindex(py, "get", args, kw)
-    }
-
     /// compute phases
     def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> {
         self.call_cindex(py, "computephasesmapsets", args, kw)
@@ -292,23 +286,32 @@
     // Since we call back through the high level Python API,
     // there's no point making a distinction between index_get
     // and index_getitem.
+    // gracinet 2023: this above is no longer true for the pure Rust impl

     def __len__(&self) -> PyResult<usize> {
         self.len(py)
     }

     def __getitem__(&self, key: PyObject) -> PyResult<PyObject> {
+        let rust_res = self.inner_getitem(py, key.clone_ref(py))?;
+
         // this conversion seems needless, but that's actually because
         // `index_getitem` does not handle conversion from PyLong,
         // which expressions such as [e for e in index] internally use.
         // Note that we don't seem to have a direct way to call
         // PySequence_GetItem (does the job), which would possibly be better
         // for performance
-        let key = match key.extract::<i32>(py) {
+        // gracinet 2023: the above comment can be removed when we use
+        // the pure Rust impl only. Note also that `key` can be a binary
+        // node id.
+        let c_key = match key.extract::<BaseRevision>(py) {
             Ok(rev) => rev.to_py_object(py).into_object(),
             Err(_) => key,
         };
-        self.cindex(py).borrow().inner().get_item(py, key)
+        let c_res = self.cindex(py).borrow().inner().get_item(py, c_key)?;
+
+        assert_py_eq(py, "__getitem__", &rust_res, &c_res)?;
+        Ok(rust_res)
     }

     def __contains__(&self, item: PyObject) -> PyResult<bool> {
@@ -426,13 +429,49 @@
         parent_rev_1: tuple.get_item(py, 5).extract(py)?,
         parent_rev_2: tuple.get_item(py, 6).extract(py)?,
         node_id,
-        _sidedata_offset: 0,
-        _sidedata_compressed_length: 0,
-        data_compression_mode: COMPRESSION_MODE_INLINE,
-        _sidedata_compression_mode: COMPRESSION_MODE_INLINE,
-        _rank: -1,
+        ..Default::default()
     })
 }
+fn revision_data_params_to_py_tuple(
+    py: Python,
+    params: RevisionDataParams,
+) -> PyTuple {
+    PyTuple::new(
+        py,
+        &[
+            params.data_offset.into_py_object(py).into_object(),
+            params
+                .data_compressed_length
+                .into_py_object(py)
+                .into_object(),
+            params
+                .data_uncompressed_length
+                .into_py_object(py)
+                .into_object(),
+            params.data_delta_base.into_py_object(py).into_object(),
+            params.link_rev.into_py_object(py).into_object(),
+            params.parent_rev_1.into_py_object(py).into_object(),
+            params.parent_rev_2.into_py_object(py).into_object(),
+            PyBytes::new(py, &params.node_id)
+                .into_py_object(py)
+                .into_object(),
+            params._sidedata_offset.into_py_object(py).into_object(),
+            params
+                ._sidedata_compressed_length
+                .into_py_object(py)
+                .into_object(),
+            params
+                .data_compression_mode
+                .into_py_object(py)
+                .into_object(),
+            params
+                ._sidedata_compression_mode
+                .into_py_object(py)
+                .into_object(),
+            params._rank.into_py_object(py).into_object(),
+        ],
+    )
+}

 impl MixedIndex {
     fn new(
@@ -601,6 +640,34 @@

         Ok(py.None())
     }
+
+    fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> {
+        let idx = self.index(py).borrow();
+        Ok(match key.extract::<BaseRevision>(py) {
+            Ok(key_as_int) => {
+                let entry_params = if key_as_int == NULL_REVISION.0 {
+                    RevisionDataParams::default()
+                } else {
+                    let rev = UncheckedRevision(key_as_int);
+                    match idx.entry_as_params(rev) {
+                        Some(e) => e,
+                        None => {
+                            return Err(PyErr::new::<IndexError, _>(
+                                py,
+                                "revlog index out of range",
+                            ));
+                        }
+                    }
+                };
+                revision_data_params_to_py_tuple(py, entry_params)
+                    .into_object()
+            }
+            _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else(
+                || py.None(),
+                |py_rev| py_rev.into_py_object(py).into_object(),
+            ),
+        })
+    }
 }

 fn revlog_error(py: Python) -> PyErr {