changeset 49120:3df46f3a3d6c

rust-dirstatemap: implement part of the `setparents` logic The Python code does many round-trip calls to the Rust dirstatemap when copy information needs to be dropped in `setparents`. This may result in improved performance on `commit`, `update` and other such commands, but was mostly done to drop the last use of `set_dirstate_item`. See inline comments for an asterisk about performance, and see next patch for why `set_dirstate_item` has to go. Differential Revision: https://phab.mercurial-scm.org/D12518
author Raphaël Gomès <rgomes@octobus.net>
date Tue, 29 Mar 2022 00:53:11 +0200
parents 4c75f00b199e
children 10edc54d18f1
files mercurial/dirstatemap.py rust/hg-core/src/dirstate_tree/dirstate_map.rs rust/hg-cpython/src/dirstate/dirstate_map.rs
diffstat 3 files changed, 49 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/dirstatemap.py	Mon Mar 28 23:45:54 2022 +0200
+++ b/mercurial/dirstatemap.py	Tue Mar 29 00:53:11 2022 +0200
@@ -593,22 +593,7 @@
             self._dirtyparents = True
             copies = {}
             if fold_p2:
-                # Collect into an intermediate list to avoid a `RuntimeError`
-                # exception due to mutation during iteration.
-                # TODO: move this the whole loop to Rust where `iter_mut`
-                # enables in-place mutation of elements of a collection while
-                # iterating it, without mutating the collection itself.
-                files_with_p2_info = [
-                    f for f, s in self._map.items() if s.p2_info
-                ]
-                rust_map = self._map
-                for f in files_with_p2_info:
-                    e = rust_map.get(f)
-                    source = self.copymap.pop(f, None)
-                    if source:
-                        copies[f] = source
-                    e.drop_merge_data()
-                    rust_map.set_dirstate_item(f, e)
+                copies = self._map.setparents_fixup()
             return copies
 
         ### disk interaction
--- a/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Mon Mar 28 23:45:54 2022 +0200
+++ b/rust/hg-core/src/dirstate_tree/dirstate_map.rs	Tue Mar 29 00:53:11 2022 +0200
@@ -1375,6 +1375,41 @@
         )))
     }
 
+    /// Only public because it needs to be exposed to the Python layer.
+    /// It is not the full `setparents` logic, only the parts that mutate the
+    /// entries.
+    pub fn setparents_fixup(
+        &mut self,
+    ) -> Result<Vec<(HgPathBuf, HgPathBuf)>, DirstateV2ParseError> {
+        // XXX
+        // All the copying and re-querying is quite inefficient, but this is
+        // still a lot better than doing it from Python.
+        //
+        // The better solution is to develop a mechanism for `iter_mut`,
+        // which will be a lot more involved: we're dealing with a lazy,
+        // append-mostly, tree-like data structure. This will do for now.
+        let mut copies = vec![];
+        let mut files_with_p2_info = vec![];
+        for res in self.iter() {
+            let (path, entry) = res?;
+            if entry.p2_info() {
+                files_with_p2_info.push(path.to_owned())
+            }
+        }
+        self.with_dmap_mut(|map| {
+            for path in files_with_p2_info.iter() {
+                let node = map.get_or_insert(path)?;
+                let entry =
+                    node.data.as_entry_mut().expect("entry should exist");
+                entry.drop_merge_data();
+                if let Some(source) = node.copy_source.take().as_deref() {
+                    copies.push((path.to_owned(), source.to_owned()));
+                }
+            }
+            Ok(copies)
+        })
+    }
+
     pub fn debug_iter(
         &self,
         all: bool,
--- a/rust/hg-cpython/src/dirstate/dirstate_map.rs	Mon Mar 28 23:45:54 2022 +0200
+++ b/rust/hg-cpython/src/dirstate/dirstate_map.rs	Tue Mar 29 00:53:11 2022 +0200
@@ -489,6 +489,19 @@
         Ok(dirs)
     }
 
+    def setparents_fixup(&self) -> PyResult<PyDict> {
+        let dict = PyDict::new(py);
+        let copies = self.inner(py).borrow_mut().setparents_fixup();
+        for (key, value) in copies.map_err(|e| v2_error(py, e))? {
+            dict.set_item(
+                py,
+                PyBytes::new(py, key.as_bytes()),
+                PyBytes::new(py, value.as_bytes()),
+            )?;
+        }
+        Ok(dict)
+    }
+
     def debug_iter(&self, all: bool) -> PyResult<PyList> {
         let dirs = PyList::new(py, &[]);
         for item in self.inner(py).borrow().debug_iter(all) {