phases: keep internal state as rev-num instead of node-id
Node-id are expensive to work with, dealing with revision is much simple and
faster.
The fact we still used node-id here shows how few effort have been put into
making the phase logic fast. We tend to no longer use node-id internally for
about ten years.
This has a large impact of repository with many draft roots. For example this
Mozilla-try copy have ½ Million draft roots and `perf::unbundle` see a
significant improvement.
### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog
# benchmark.name = hg.perf.perf-unbundle
# bin-env-vars.hg.flavor = no-rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.
issue6528 = disabled
# benchmark.variants.revs = last-1
before:: 1.746791 seconds
after:: 1.278379 seconds (-26.82%)
# benchmark.variants.revs = last-10
before:: 3.145774 seconds
after:: 2.103735 seconds (-33.13%)
# benchmark.variants.revs = last-100
before:: 3.487635 seconds
after:: 2.446749 seconds (-29.85%)
# benchmark.variants.revs = last-1000
before:: 5.007568 seconds
after:: 3.989923 seconds (-20.32%)
--- a/hgext/mq.py Tue Feb 20 21:40:08 2024 +0100
+++ b/hgext/mq.py Tue Feb 20 21:40:13 2024 +0100
@@ -4073,7 +4073,7 @@
else:
mqphase = phases.draft
qbase = repo[repo.mq.applied[0].node]
- roots[mqphase].add(qbase.node())
+ roots[mqphase].add(qbase.rev())
return roots
--- a/mercurial/cext/parsers.pyi Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/cext/parsers.pyi Tue Feb 20 21:40:13 2024 +0100
@@ -58,7 +58,7 @@
def get_rev(self, value: bytes) -> Optional[int]: ...
def has_node(self, value: Union[int, bytes]) -> bool: ...
def rev(self, node: bytes) -> int: ...
- def computephasesmapsets(self, root: Dict[int, Set[bytes]]) -> Tuple[int, Dict[int, Set[bytes]]]: ...
+ def computephasesmapsets(self, root: Dict[int, Set[int]]) -> Tuple[int, Dict[int, Set[bytes]]]: ...
def reachableroots2(self, minroot: int, heads: List[int], roots: List[int], includepath: bool) -> List[int]: ...
def headrevs(self, filteredrevs: Optional[List[int]]) -> List[int]: ...
def headrevsfiltered(self, filteredrevs: Optional[List[int]]) -> List[int]: ...
--- a/mercurial/cext/revlog.c Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/cext/revlog.c Tue Feb 20 21:40:13 2024 +0100
@@ -1081,7 +1081,6 @@
PyObject *item;
PyObject *iterator;
int rev, minrev = -1;
- char *node;
if (!PySet_Check(roots)) {
PyErr_SetString(PyExc_TypeError,
@@ -1092,9 +1091,10 @@
if (iterator == NULL)
return -2;
while ((item = PyIter_Next(iterator))) {
- if (node_check(self->nodelen, item, &node) == -1)
+ rev = (int)PyLong_AsLong(item);
+ if (rev == -1 && PyErr_Occurred()) {
goto failed;
- rev = index_find_node(self, node);
+ }
/* null is implicitly public, so negative is invalid */
if (rev < 0 || rev >= len)
goto failed;
--- a/mercurial/phases.py Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/phases.py Tue Feb 20 21:40:13 2024 +0100
@@ -133,7 +133,7 @@
util,
)
-Phaseroots = Dict[int, Set[bytes]]
+Phaseroots = Dict[int, Set[int]]
if typing.TYPE_CHECKING:
from . import (
@@ -210,7 +210,7 @@
repo = repo.unfiltered()
dirty = False
roots = {i: set() for i in allphases}
- has_node = repo.changelog.index.has_node
+ to_rev = repo.changelog.index.get_rev
unknown_msg = b'removing unknown node %s from %i-phase boundary\n'
try:
f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots')
@@ -219,11 +219,12 @@
str_phase, hex_node = line.split()
phase = int(str_phase)
node = bin(hex_node)
- if not has_node(node):
+ rev = to_rev(node)
+ if rev is None:
repo.ui.debug(unknown_msg % (short(hex_node), phase))
dirty = True
else:
- roots[phase].add(node)
+ roots[phase].add(rev)
finally:
f.close()
except FileNotFoundError:
@@ -391,7 +392,7 @@
def nonpublicphaseroots(
self, repo: "localrepo.localrepository"
- ) -> Set[bytes]:
+ ) -> Set[int]:
"""returns the roots of all non-public phases
The roots are not minimized, so if the secret revisions are
@@ -499,7 +500,7 @@
self._phasesets = {phase: set() for phase in allphases}
lowerroots = set()
for phase in reversed(trackedphases):
- roots = pycompat.maplist(cl.rev, self._phaseroots[phase])
+ roots = self._phaseroots[phase]
if roots:
ps = set(cl.descendants(roots))
for root in roots:
@@ -551,8 +552,10 @@
def _write(self, repo, fp):
assert repo.filtername is None
+ to_node = repo.changelog.node
for phase, roots in self._phaseroots.items():
- for h in sorted(roots):
+ for r in sorted(roots):
+ h = to_node(r)
fp.write(b'%i %s\n' % (phase, hex(h)))
self.dirty = False
@@ -584,7 +587,7 @@
repo.invalidatevolatilesets()
def advanceboundary(
- self, repo, tr, targetphase, nodes, revs=None, dryrun=None
+ self, repo, tr, targetphase, nodes=None, revs=None, dryrun=None
):
"""Set all 'nodes' to phase 'targetphase'
@@ -598,6 +601,8 @@
# phaseroots values, replace them.
if revs is None:
revs = []
+ if not revs and not nodes:
+ return set()
if tr is None:
phasetracking = None
else:
@@ -616,7 +621,7 @@
olds = self._phaseroots[phase]
- affected = repo.revs(b'%ln::%ld', olds, revs)
+ affected = repo.revs(b'%ld::%ld', olds, revs)
changes.update(affected)
if dryrun:
continue
@@ -625,10 +630,7 @@
phasetracking, r, self.phase(repo, r), targetphase
)
- roots = {
- ctx.node()
- for ctx in repo.set(b'roots((%ln::) - %ld)', olds, affected)
- }
+ roots = set(repo.revs(b'roots((%ld::) - %ld)', olds, affected))
if olds != roots:
self._updateroots(repo, phase, roots, tr)
# some roots may need to be declared for lower phases
@@ -636,7 +638,7 @@
if not dryrun:
# declare deleted root in the target phase
if targetphase != 0:
- self._retractboundary(repo, tr, targetphase, delroots)
+ self._retractboundary(repo, tr, targetphase, revs=delroots)
repo.invalidatevolatilesets()
return changes
@@ -651,21 +653,19 @@
else:
phasetracking = tr.changes.get(b'phases')
repo = repo.unfiltered()
- if (
- self._retractboundary(repo, tr, targetphase, nodes)
- and phasetracking is not None
- ):
+ retracted = self._retractboundary(repo, tr, targetphase, nodes)
+ if retracted and phasetracking is not None:
# find the affected revisions
new = self._phaseroots[targetphase]
old = oldroots[targetphase]
- affected = set(repo.revs(b'(%ln::) - (%ln::)', new, old))
+ affected = set(repo.revs(b'(%ld::) - (%ld::)', new, old))
# find the phase of the affected revision
for phase in range(targetphase, -1, -1):
if phase:
roots = oldroots.get(phase, [])
- revs = set(repo.revs(b'%ln::%ld', roots, affected))
+ revs = set(repo.revs(b'%ld::%ld', roots, affected))
affected -= revs
else: # public phase
revs = affected
@@ -673,11 +673,15 @@
_trackphasechange(phasetracking, r, phase, targetphase)
repo.invalidatevolatilesets()
- def _retractboundary(self, repo, tr, targetphase, nodes, revs=None):
+ def _retractboundary(self, repo, tr, targetphase, nodes=None, revs=None):
# Be careful to preserve shallow-copied values: do not update
# phaseroots values, replace them.
if revs is None:
revs = []
+ if nodes is None:
+ nodes = []
+ if not revs and not nodes:
+ return False
if (
targetphase == internal
and not supportinternal(repo)
@@ -688,10 +692,8 @@
msg = b'this repository does not support the %s phase' % name
raise error.ProgrammingError(msg)
- repo = repo.unfiltered()
- torev = repo.changelog.rev
- tonode = repo.changelog.node
- currentroots = {torev(node) for node in self._phaseroots[targetphase]}
+ torev = repo.changelog.index.rev
+ currentroots = self._phaseroots[targetphase]
finalroots = oldroots = set(currentroots)
newroots = [torev(node) for node in nodes] + [r for r in revs]
newroots = [
@@ -701,6 +703,8 @@
if newroots:
if nullrev in newroots:
raise error.Abort(_(b'cannot change null revision phase'))
+ # do not break the CoW assumption of the shallow copy
+ currentroots = currentroots.copy()
currentroots.update(newroots)
# Only compute new roots for revs above the roots that are being
@@ -712,18 +716,13 @@
finalroots = {rev for rev in currentroots if rev < minnewroot}
finalroots.update(updatedroots)
if finalroots != oldroots:
- self._updateroots(
- repo,
- targetphase,
- {tonode(rev) for rev in finalroots},
- tr,
- )
+ self._updateroots(repo, targetphase, finalroots, tr)
return True
return False
def register_strip(
self,
- repo: "localrepo.localrepository",
+ repo,
tr,
strip_rev: int,
):
@@ -731,12 +730,10 @@
Any roots higher than the stripped revision should be dropped.
"""
- assert repo.filtername is None
- to_rev = repo.changelog.index.rev
- for targetphase, nodes in list(self._phaseroots.items()):
- filtered = {n for n in nodes if to_rev(n) >= strip_rev}
+ for targetphase, roots in list(self._phaseroots.items()):
+ filtered = {r for r in roots if r >= strip_rev}
if filtered:
- self._updateroots(repo, targetphase, nodes - filtered, tr)
+ self._updateroots(repo, targetphase, roots - filtered, tr)
self.invalidate()
@@ -793,9 +790,10 @@
keys = util.sortdict()
value = b'%i' % draft
cl = repo.unfiltered().changelog
+ to_node = cl.node
for root in repo._phasecache._phaseroots[draft]:
- if repo._phasecache.phase(repo, cl.rev(root)) <= draft:
- keys[hex(root)] = value
+ if repo._phasecache.phase(repo, root) <= draft:
+ keys[hex(to_node(root))] = value
if repo.publishing():
# Add an extra data to let remote know we are a publishing
--- a/mercurial/repoview.py Tue Feb 20 21:40:08 2024 +0100
+++ b/mercurial/repoview.py Tue Feb 20 21:40:13 2024 +0100
@@ -160,7 +160,7 @@
firstmutable = len(cl)
roots = repo._phasecache.nonpublicphaseroots(repo)
if roots:
- firstmutable = min(firstmutable, min(cl.rev(r) for r in roots))
+ firstmutable = min(firstmutable, min(roots))
# protect from nullrev root
firstmutable = max(0, firstmutable)
return frozenset(range(firstmutable, len(cl)))
--- a/rust/hg-cpython/src/revlog.rs Tue Feb 20 21:40:08 2024 +0100
+++ b/rust/hg-cpython/src/revlog.rs Tue Feb 20 21:40:13 2024 +0100
@@ -970,31 +970,16 @@
py_roots: PyDict,
) -> PyResult<PyObject> {
let index = &*self.index(py).borrow();
- let opt = self.get_nodetree(py)?.borrow();
- let nt = opt.as_ref().unwrap();
let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots
.items_list(py)
.iter(py)
.map(|r| {
let phase = r.get_item(py, 0)?;
- let nodes = r.get_item(py, 1)?;
- // Transform the nodes from Python to revs here since we
- // have access to the nodemap
- let revs: Result<_, _> = nodes
- .iter(py)?
- .map(|node| match node?.extract::<PyBytes>(py) {
- Ok(py_bytes) => {
- let node = node_from_py_bytes(py, &py_bytes)?;
- nt.find_bin(index, node.into())
- .map_err(|e| nodemap_error(py, e))?
- .ok_or_else(|| revlog_error(py))
- }
- Err(e) => Err(e),
- })
- .collect();
+ let revs: Vec<_> =
+ rev_pyiter_collect(py, &r.get_item(py, 1)?, index)?;
let phase = Phase::try_from(phase.extract::<usize>(py)?)
.map_err(|_| revlog_error(py));
- Ok((phase?, revs?))
+ Ok((phase?, revs))
})
.collect();
let (len, phase_maps) = index
--- a/tests/test-debugcommands.t Tue Feb 20 21:40:08 2024 +0100
+++ b/tests/test-debugcommands.t Tue Feb 20 21:40:13 2024 +0100
@@ -197,7 +197,7 @@
node trie depth: 1
node trie last rev scanned: -1 (no-rust !)
node trie last rev scanned: 3 (rust !)
- node trie lookups: 4 (no-rust !)
+ node trie lookups: 3 (no-rust !)
node trie lookups: 2 (rust !)
node trie misses: 1
node trie splits: 1
--- a/tests/test-parseindex.t Tue Feb 20 21:40:08 2024 +0100
+++ b/tests/test-parseindex.t Tue Feb 20 21:40:13 2024 +0100
@@ -187,7 +187,7 @@
> ops = [
> ('reachableroots',
> lambda: cl.index.reachableroots2(0, [1], [0], False)),
- > ('compute_phases_map_sets', lambda: cl.computephases({1: {cl.node(0)}})),
+ > ('compute_phases_map_sets', lambda: cl.computephases({1: {0}})),
> ('index_headrevs', lambda: cl.headrevs()),
> ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)),
> ('find_deepest', lambda: cl.ancestor(n0, n1)),