changeset 51406:f8bf1a8e9181

phases: keep internal state as rev-num instead of node-id Node-id are expensive to work with, dealing with revision is much simple and faster. The fact we still used node-id here shows how few effort have been put into making the phase logic fast. We tend to no longer use node-id internally for about ten years. This has a large impact of repository with many draft roots. For example this Mozilla-try copy have ½ Million draft roots and `perf::unbundle` see a significant improvement. ### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog # benchmark.name = hg.perf.perf-unbundle # bin-env-vars.hg.flavor = no-rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.issue6528 = disabled # benchmark.variants.revs = last-1 before:: 1.746791 seconds after:: 1.278379 seconds (-26.82%) # benchmark.variants.revs = last-10 before:: 3.145774 seconds after:: 2.103735 seconds (-33.13%) # benchmark.variants.revs = last-100 before:: 3.487635 seconds after:: 2.446749 seconds (-29.85%) # benchmark.variants.revs = last-1000 before:: 5.007568 seconds after:: 3.989923 seconds (-20.32%)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 20 Feb 2024 21:40:13 +0100
parents 12881244e48a
children 71ae6fee2b9d
files hgext/mq.py mercurial/cext/parsers.pyi mercurial/cext/revlog.c mercurial/phases.py mercurial/repoview.py rust/hg-cpython/src/revlog.rs tests/test-debugcommands.t tests/test-parseindex.t
diffstat 8 files changed, 48 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/mq.py	Tue Feb 20 21:40:08 2024 +0100
+++ b/hgext/mq.py	Tue Feb 20 21:40:13 2024 +0100
@@ -4073,7 +4073,7 @@
         else:
             mqphase = phases.draft
         qbase = repo[repo.mq.applied[0].node]
-        roots[mqphase].add(qbase.node())
+        roots[mqphase].add(qbase.rev())
     return roots
 
 
--- a/mercurial/cext/parsers.pyi	Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/cext/parsers.pyi	Tue Feb 20 21:40:13 2024 +0100
@@ -58,7 +58,7 @@
     def get_rev(self, value: bytes) -> Optional[int]: ...
     def has_node(self, value: Union[int, bytes]) -> bool: ...
     def rev(self, node: bytes) -> int: ...
-    def computephasesmapsets(self, root: Dict[int, Set[bytes]]) -> Tuple[int, Dict[int, Set[bytes]]]: ...
+    def computephasesmapsets(self, root: Dict[int, Set[int]]) -> Tuple[int, Dict[int, Set[bytes]]]: ...
     def reachableroots2(self, minroot: int, heads: List[int], roots: List[int], includepath: bool) -> List[int]: ...
     def headrevs(self, filteredrevs: Optional[List[int]]) -> List[int]: ...
     def headrevsfiltered(self, filteredrevs: Optional[List[int]]) -> List[int]: ...
--- a/mercurial/cext/revlog.c	Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/cext/revlog.c	Tue Feb 20 21:40:13 2024 +0100
@@ -1081,7 +1081,6 @@
 	PyObject *item;
 	PyObject *iterator;
 	int rev, minrev = -1;
-	char *node;
 
 	if (!PySet_Check(roots)) {
 		PyErr_SetString(PyExc_TypeError,
@@ -1092,9 +1091,10 @@
 	if (iterator == NULL)
 		return -2;
 	while ((item = PyIter_Next(iterator))) {
-		if (node_check(self->nodelen, item, &node) == -1)
+		rev = (int)PyLong_AsLong(item);
+		if (rev == -1 && PyErr_Occurred()) {
 			goto failed;
-		rev = index_find_node(self, node);
+		}
 		/* null is implicitly public, so negative is invalid */
 		if (rev < 0 || rev >= len)
 			goto failed;
--- a/mercurial/phases.py	Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/phases.py	Tue Feb 20 21:40:13 2024 +0100
@@ -133,7 +133,7 @@
     util,
 )
 
-Phaseroots = Dict[int, Set[bytes]]
+Phaseroots = Dict[int, Set[int]]
 
 if typing.TYPE_CHECKING:
     from . import (
@@ -210,7 +210,7 @@
     repo = repo.unfiltered()
     dirty = False
     roots = {i: set() for i in allphases}
-    has_node = repo.changelog.index.has_node
+    to_rev = repo.changelog.index.get_rev
     unknown_msg = b'removing unknown node %s from %i-phase boundary\n'
     try:
         f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
@@ -219,11 +219,12 @@
                 str_phase, hex_node = line.split()
                 phase = int(str_phase)
                 node = bin(hex_node)
-                if not has_node(node):
+                rev = to_rev(node)
+                if rev is None:
                     repo.ui.debug(unknown_msg % (short(hex_node), phase))
                     dirty = True
                 else:
-                    roots[phase].add(node)
+                    roots[phase].add(rev)
         finally:
             f.close()
     except FileNotFoundError:
@@ -391,7 +392,7 @@
 
     def nonpublicphaseroots(
         self, repo: "localrepo.localrepository"
-    ) -> Set[bytes]:
+    ) -> Set[int]:
         """returns the roots of all non-public phases
 
         The roots are not minimized, so if the secret revisions are
@@ -499,7 +500,7 @@
         self._phasesets = {phase: set() for phase in allphases}
         lowerroots = set()
         for phase in reversed(trackedphases):
-            roots = pycompat.maplist(cl.rev, self._phaseroots[phase])
+            roots = self._phaseroots[phase]
             if roots:
                 ps = set(cl.descendants(roots))
                 for root in roots:
@@ -551,8 +552,10 @@
 
     def _write(self, repo, fp):
         assert repo.filtername is None
+        to_node = repo.changelog.node
         for phase, roots in self._phaseroots.items():
-            for h in sorted(roots):
+            for r in sorted(roots):
+                h = to_node(r)
                 fp.write(b'%i %s\n' % (phase, hex(h)))
         self.dirty = False
 
@@ -584,7 +587,7 @@
         repo.invalidatevolatilesets()
 
     def advanceboundary(
-        self, repo, tr, targetphase, nodes, revs=None, dryrun=None
+        self, repo, tr, targetphase, nodes=None, revs=None, dryrun=None
     ):
         """Set all 'nodes' to phase 'targetphase'
 
@@ -598,6 +601,8 @@
         # phaseroots values, replace them.
         if revs is None:
             revs = []
+        if not revs and not nodes:
+            return set()
         if tr is None:
             phasetracking = None
         else:
@@ -616,7 +621,7 @@
 
             olds = self._phaseroots[phase]
 
-            affected = repo.revs(b'%ln::%ld', olds, revs)
+            affected = repo.revs(b'%ld::%ld', olds, revs)
             changes.update(affected)
             if dryrun:
                 continue
@@ -625,10 +630,7 @@
                     phasetracking, r, self.phase(repo, r), targetphase
                 )
 
-            roots = {
-                ctx.node()
-                for ctx in repo.set(b'roots((%ln::) - %ld)', olds, affected)
-            }
+            roots = set(repo.revs(b'roots((%ld::) - %ld)', olds, affected))
             if olds != roots:
                 self._updateroots(repo, phase, roots, tr)
                 # some roots may need to be declared for lower phases
@@ -636,7 +638,7 @@
         if not dryrun:
             # declare deleted root in the target phase
             if targetphase != 0:
-                self._retractboundary(repo, tr, targetphase, delroots)
+                self._retractboundary(repo, tr, targetphase, revs=delroots)
             repo.invalidatevolatilesets()
         return changes
 
@@ -651,21 +653,19 @@
         else:
             phasetracking = tr.changes.get(b'phases')
         repo = repo.unfiltered()
-        if (
-            self._retractboundary(repo, tr, targetphase, nodes)
-            and phasetracking is not None
-        ):
+        retracted = self._retractboundary(repo, tr, targetphase, nodes)
+        if retracted and phasetracking is not None:
 
             # find the affected revisions
             new = self._phaseroots[targetphase]
             old = oldroots[targetphase]
-            affected = set(repo.revs(b'(%ln::) - (%ln::)', new, old))
+            affected = set(repo.revs(b'(%ld::) - (%ld::)', new, old))
 
             # find the phase of the affected revision
             for phase in range(targetphase, -1, -1):
                 if phase:
                     roots = oldroots.get(phase, [])
-                    revs = set(repo.revs(b'%ln::%ld', roots, affected))
+                    revs = set(repo.revs(b'%ld::%ld', roots, affected))
                     affected -= revs
                 else:  # public phase
                     revs = affected
@@ -673,11 +673,15 @@
                     _trackphasechange(phasetracking, r, phase, targetphase)
         repo.invalidatevolatilesets()
 
-    def _retractboundary(self, repo, tr, targetphase, nodes, revs=None):
+    def _retractboundary(self, repo, tr, targetphase, nodes=None, revs=None):
         # Be careful to preserve shallow-copied values: do not update
         # phaseroots values, replace them.
         if revs is None:
             revs = []
+        if nodes is None:
+            nodes = []
+        if not revs and not nodes:
+            return False
         if (
             targetphase == internal
             and not supportinternal(repo)
@@ -688,10 +692,8 @@
             msg = b'this repository does not support the %s phase' % name
             raise error.ProgrammingError(msg)
 
-        repo = repo.unfiltered()
-        torev = repo.changelog.rev
-        tonode = repo.changelog.node
-        currentroots = {torev(node) for node in self._phaseroots[targetphase]}
+        torev = repo.changelog.index.rev
+        currentroots = self._phaseroots[targetphase]
         finalroots = oldroots = set(currentroots)
         newroots = [torev(node) for node in nodes] + [r for r in revs]
         newroots = [
@@ -701,6 +703,8 @@
         if newroots:
             if nullrev in newroots:
                 raise error.Abort(_(b'cannot change null revision phase'))
+            # do not break the CoW assumption of the shallow copy
+            currentroots = currentroots.copy()
             currentroots.update(newroots)
 
             # Only compute new roots for revs above the roots that are being
@@ -712,18 +716,13 @@
             finalroots = {rev for rev in currentroots if rev < minnewroot}
             finalroots.update(updatedroots)
         if finalroots != oldroots:
-            self._updateroots(
-                repo,
-                targetphase,
-                {tonode(rev) for rev in finalroots},
-                tr,
-            )
+            self._updateroots(repo, targetphase, finalroots, tr)
             return True
         return False
 
     def register_strip(
         self,
-        repo: "localrepo.localrepository",
+        repo,
         tr,
         strip_rev: int,
     ):
@@ -731,12 +730,10 @@
 
         Any roots higher than the stripped revision should be dropped.
         """
-        assert repo.filtername is None
-        to_rev = repo.changelog.index.rev
-        for targetphase, nodes in list(self._phaseroots.items()):
-            filtered = {n for n in nodes if to_rev(n) >= strip_rev}
+        for targetphase, roots in list(self._phaseroots.items()):
+            filtered = {r for r in roots if r >= strip_rev}
             if filtered:
-                self._updateroots(repo, targetphase, nodes - filtered, tr)
+                self._updateroots(repo, targetphase, roots - filtered, tr)
         self.invalidate()
 
 
@@ -793,9 +790,10 @@
     keys = util.sortdict()
     value = b'%i' % draft
     cl = repo.unfiltered().changelog
+    to_node = cl.node
     for root in repo._phasecache._phaseroots[draft]:
-        if repo._phasecache.phase(repo, cl.rev(root)) <= draft:
-            keys[hex(root)] = value
+        if repo._phasecache.phase(repo, root) <= draft:
+            keys[hex(to_node(root))] = value
 
     if repo.publishing():
         # Add an extra data to let remote know we are a publishing
--- a/mercurial/repoview.py	Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/repoview.py	Tue Feb 20 21:40:13 2024 +0100
@@ -160,7 +160,7 @@
     firstmutable = len(cl)
     roots = repo._phasecache.nonpublicphaseroots(repo)
     if roots:
-        firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
+        firstmutable = min(firstmutable, min(roots))
     # protect from nullrev root
     firstmutable = max(0, firstmutable)
     return frozenset(range(firstmutable, len(cl)))
--- a/rust/hg-cpython/src/revlog.rs	Tue Feb 20 21:40:08 2024 +0100
+++ b/rust/hg-cpython/src/revlog.rs	Tue Feb 20 21:40:13 2024 +0100
@@ -970,31 +970,16 @@
         py_roots: PyDict,
     ) -> PyResult<PyObject> {
         let index = &*self.index(py).borrow();
-        let opt = self.get_nodetree(py)?.borrow();
-        let nt = opt.as_ref().unwrap();
         let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
             .items_list(py)
             .iter(py)
             .map(|r| {
                 let phase = r.get_item(py, 0)?;
-                let nodes = r.get_item(py, 1)?;
-                // Transform the nodes from Python to revs here since we
-                // have access to the nodemap
-                let revs: Result<_, _> = nodes
-                    .iter(py)?
-                    .map(|node| match node?.extract::<PyBytes>(py) {
-                        Ok(py_bytes) => {
-                            let node = node_from_py_bytes(py, &py_bytes)?;
-                            nt.find_bin(index, node.into())
-                                .map_err(|e| nodemap_error(py, e))?
-                                .ok_or_else(|| revlog_error(py))
-                        }
-                        Err(e) => Err(e),
-                    })
-                    .collect();
+                let revs: Vec<_> =
+                    rev_pyiter_collect(py, &r.get_item(py, 1)?, index)?;
                 let phase = Phase::try_from(phase.extract::<usize>(py)?)
                     .map_err(|_| revlog_error(py));
-                Ok((phase?, revs?))
+                Ok((phase?, revs))
             })
             .collect();
         let (len, phase_maps) = index
--- a/tests/test-debugcommands.t	Tue Feb 20 21:40:08 2024 +0100
+++ b/tests/test-debugcommands.t	Tue Feb 20 21:40:13 2024 +0100
@@ -197,7 +197,7 @@
   node trie depth: 1
   node trie last rev scanned: -1 (no-rust !)
   node trie last rev scanned: 3 (rust !)
-  node trie lookups: 4 (no-rust !)
+  node trie lookups: 3 (no-rust !)
   node trie lookups: 2 (rust !)
   node trie misses: 1
   node trie splits: 1
--- a/tests/test-parseindex.t	Tue Feb 20 21:40:08 2024 +0100
+++ b/tests/test-parseindex.t	Tue Feb 20 21:40:13 2024 +0100
@@ -187,7 +187,7 @@
   > ops = [
   >     ('reachableroots',
   >      lambda: cl.index.reachableroots2(0, [1], [0], False)),
-  >     ('compute_phases_map_sets', lambda: cl.computephases({1: {cl.node(0)}})),
+  >     ('compute_phases_map_sets', lambda: cl.computephases({1: {0}})),
   >     ('index_headrevs', lambda: cl.headrevs()),
   >     ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
   >     ('find_deepest', lambda: cl.ancestor(n0, n1)),