Mercurial > hg
changeset 51406:f8bf1a8e9181
phases: keep internal state as rev-num instead of node-id
Node-id are expensive to work with, dealing with revision is much simple and
faster.
The fact we still used node-id here shows how few effort have been put into
making the phase logic fast. We tend to no longer use node-id internally for
about ten years.
This has a large impact of repository with many draft roots. For example this
Mozilla-try copy have ½ Million draft roots and `perf::unbundle` see a
significant improvement.
### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog
# benchmark.name = hg.perf.perf-unbundle
# bin-env-vars.hg.flavor = no-rust
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.issue6528 = disabled
# benchmark.variants.revs = last-1
before:: 1.746791 seconds
after:: 1.278379 seconds (-26.82%)
# benchmark.variants.revs = last-10
before:: 3.145774 seconds
after:: 2.103735 seconds (-33.13%)
# benchmark.variants.revs = last-100
before:: 3.487635 seconds
after:: 2.446749 seconds (-29.85%)
# benchmark.variants.revs = last-1000
before:: 5.007568 seconds
after:: 3.989923 seconds (-20.32%)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 20 Feb 2024 21:40:13 +0100 |
parents | 12881244e48a |
children | 71ae6fee2b9d |
files | hgext/mq.py mercurial/cext/parsers.pyi mercurial/cext/revlog.c mercurial/phases.py mercurial/repoview.py rust/hg-cpython/src/revlog.rs tests/test-debugcommands.t tests/test-parseindex.t |
diffstat | 8 files changed, 48 insertions(+), 65 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/mq.py Tue Feb 20 21:40:08 2024 +0100 +++ b/hgext/mq.py Tue Feb 20 21:40:13 2024 +0100 @@ -4073,7 +4073,7 @@ else: mqphase = phases.draft qbase = repo[repo.mq.applied[0].node] - roots[mqphase].add(qbase.node()) + roots[mqphase].add(qbase.rev()) return roots
--- a/mercurial/cext/parsers.pyi Tue Feb 20 21:40:08 2024 +0100 +++ b/mercurial/cext/parsers.pyi Tue Feb 20 21:40:13 2024 +0100 @@ -58,7 +58,7 @@ def get_rev(self, value: bytes) -> Optional[int]: ... def has_node(self, value: Union[int, bytes]) -> bool: ... def rev(self, node: bytes) -> int: ... - def computephasesmapsets(self, root: Dict[int, Set[bytes]]) -> Tuple[int, Dict[int, Set[bytes]]]: ... + def computephasesmapsets(self, root: Dict[int, Set[int]]) -> Tuple[int, Dict[int, Set[bytes]]]: ... def reachableroots2(self, minroot: int, heads: List[int], roots: List[int], includepath: bool) -> List[int]: ... def headrevs(self, filteredrevs: Optional[List[int]]) -> List[int]: ... def headrevsfiltered(self, filteredrevs: Optional[List[int]]) -> List[int]: ...
--- a/mercurial/cext/revlog.c Tue Feb 20 21:40:08 2024 +0100 +++ b/mercurial/cext/revlog.c Tue Feb 20 21:40:13 2024 +0100 @@ -1081,7 +1081,6 @@ PyObject *item; PyObject *iterator; int rev, minrev = -1; - char *node; if (!PySet_Check(roots)) { PyErr_SetString(PyExc_TypeError, @@ -1092,9 +1091,10 @@ if (iterator == NULL) return -2; while ((item = PyIter_Next(iterator))) { - if (node_check(self->nodelen, item, &node) == -1) + rev = (int)PyLong_AsLong(item); + if (rev == -1 && PyErr_Occurred()) { goto failed; - rev = index_find_node(self, node); + } /* null is implicitly public, so negative is invalid */ if (rev < 0 || rev >= len) goto failed;
--- a/mercurial/phases.py Tue Feb 20 21:40:08 2024 +0100 +++ b/mercurial/phases.py Tue Feb 20 21:40:13 2024 +0100 @@ -133,7 +133,7 @@ util, ) -Phaseroots = Dict[int, Set[bytes]] +Phaseroots = Dict[int, Set[int]] if typing.TYPE_CHECKING: from . import ( @@ -210,7 +210,7 @@ repo = repo.unfiltered() dirty = False roots = {i: set() for i in allphases} - has_node = repo.changelog.index.has_node + to_rev = repo.changelog.index.get_rev unknown_msg = b'removing unknown node %s from %i-phase boundary\n' try: f, pending = txnutil.trypending(repo.root, repo.svfs, b'phaseroots') @@ -219,11 +219,12 @@ str_phase, hex_node = line.split() phase = int(str_phase) node = bin(hex_node) - if not has_node(node): + rev = to_rev(node) + if rev is None: repo.ui.debug(unknown_msg % (short(hex_node), phase)) dirty = True else: - roots[phase].add(node) + roots[phase].add(rev) finally: f.close() except FileNotFoundError: @@ -391,7 +392,7 @@ def nonpublicphaseroots( self, repo: "localrepo.localrepository" - ) -> Set[bytes]: + ) -> Set[int]: """returns the roots of all non-public phases The roots are not minimized, so if the secret revisions are @@ -499,7 +500,7 @@ self._phasesets = {phase: set() for phase in allphases} lowerroots = set() for phase in reversed(trackedphases): - roots = pycompat.maplist(cl.rev, self._phaseroots[phase]) + roots = self._phaseroots[phase] if roots: ps = set(cl.descendants(roots)) for root in roots: @@ -551,8 +552,10 @@ def _write(self, repo, fp): assert repo.filtername is None + to_node = repo.changelog.node for phase, roots in self._phaseroots.items(): - for h in sorted(roots): + for r in sorted(roots): + h = to_node(r) fp.write(b'%i %s\n' % (phase, hex(h))) self.dirty = False @@ -584,7 +587,7 @@ repo.invalidatevolatilesets() def advanceboundary( - self, repo, tr, targetphase, nodes, revs=None, dryrun=None + self, repo, tr, targetphase, nodes=None, revs=None, dryrun=None ): """Set all 'nodes' to phase 'targetphase' @@ -598,6 +601,8 @@ # phaseroots values, replace them. if revs is None: revs = [] + if not revs and not nodes: + return set() if tr is None: phasetracking = None else: @@ -616,7 +621,7 @@ olds = self._phaseroots[phase] - affected = repo.revs(b'%ln::%ld', olds, revs) + affected = repo.revs(b'%ld::%ld', olds, revs) changes.update(affected) if dryrun: continue @@ -625,10 +630,7 @@ phasetracking, r, self.phase(repo, r), targetphase ) - roots = { - ctx.node() - for ctx in repo.set(b'roots((%ln::) - %ld)', olds, affected) - } + roots = set(repo.revs(b'roots((%ld::) - %ld)', olds, affected)) if olds != roots: self._updateroots(repo, phase, roots, tr) # some roots may need to be declared for lower phases @@ -636,7 +638,7 @@ if not dryrun: # declare deleted root in the target phase if targetphase != 0: - self._retractboundary(repo, tr, targetphase, delroots) + self._retractboundary(repo, tr, targetphase, revs=delroots) repo.invalidatevolatilesets() return changes @@ -651,21 +653,19 @@ else: phasetracking = tr.changes.get(b'phases') repo = repo.unfiltered() - if ( - self._retractboundary(repo, tr, targetphase, nodes) - and phasetracking is not None - ): + retracted = self._retractboundary(repo, tr, targetphase, nodes) + if retracted and phasetracking is not None: # find the affected revisions new = self._phaseroots[targetphase] old = oldroots[targetphase] - affected = set(repo.revs(b'(%ln::) - (%ln::)', new, old)) + affected = set(repo.revs(b'(%ld::) - (%ld::)', new, old)) # find the phase of the affected revision for phase in range(targetphase, -1, -1): if phase: roots = oldroots.get(phase, []) - revs = set(repo.revs(b'%ln::%ld', roots, affected)) + revs = set(repo.revs(b'%ld::%ld', roots, affected)) affected -= revs else: # public phase revs = affected @@ -673,11 +673,15 @@ _trackphasechange(phasetracking, r, phase, targetphase) repo.invalidatevolatilesets() - def _retractboundary(self, repo, tr, targetphase, nodes, revs=None): + def _retractboundary(self, repo, tr, targetphase, nodes=None, revs=None): # Be careful to preserve shallow-copied values: do not update # phaseroots values, replace them. if revs is None: revs = [] + if nodes is None: + nodes = [] + if not revs and not nodes: + return False if ( targetphase == internal and not supportinternal(repo) @@ -688,10 +692,8 @@ msg = b'this repository does not support the %s phase' % name raise error.ProgrammingError(msg) - repo = repo.unfiltered() - torev = repo.changelog.rev - tonode = repo.changelog.node - currentroots = {torev(node) for node in self._phaseroots[targetphase]} + torev = repo.changelog.index.rev + currentroots = self._phaseroots[targetphase] finalroots = oldroots = set(currentroots) newroots = [torev(node) for node in nodes] + [r for r in revs] newroots = [ @@ -701,6 +703,8 @@ if newroots: if nullrev in newroots: raise error.Abort(_(b'cannot change null revision phase')) + # do not break the CoW assumption of the shallow copy + currentroots = currentroots.copy() currentroots.update(newroots) # Only compute new roots for revs above the roots that are being @@ -712,18 +716,13 @@ finalroots = {rev for rev in currentroots if rev < minnewroot} finalroots.update(updatedroots) if finalroots != oldroots: - self._updateroots( - repo, - targetphase, - {tonode(rev) for rev in finalroots}, - tr, - ) + self._updateroots(repo, targetphase, finalroots, tr) return True return False def register_strip( self, - repo: "localrepo.localrepository", + repo, tr, strip_rev: int, ): @@ -731,12 +730,10 @@ Any roots higher than the stripped revision should be dropped. """ - assert repo.filtername is None - to_rev = repo.changelog.index.rev - for targetphase, nodes in list(self._phaseroots.items()): - filtered = {n for n in nodes if to_rev(n) >= strip_rev} + for targetphase, roots in list(self._phaseroots.items()): + filtered = {r for r in roots if r >= strip_rev} if filtered: - self._updateroots(repo, targetphase, nodes - filtered, tr) + self._updateroots(repo, targetphase, roots - filtered, tr) self.invalidate() @@ -793,9 +790,10 @@ keys = util.sortdict() value = b'%i' % draft cl = repo.unfiltered().changelog + to_node = cl.node for root in repo._phasecache._phaseroots[draft]: - if repo._phasecache.phase(repo, cl.rev(root)) <= draft: - keys[hex(root)] = value + if repo._phasecache.phase(repo, root) <= draft: + keys[hex(to_node(root))] = value if repo.publishing(): # Add an extra data to let remote know we are a publishing
--- a/mercurial/repoview.py Tue Feb 20 21:40:08 2024 +0100 +++ b/mercurial/repoview.py Tue Feb 20 21:40:13 2024 +0100 @@ -160,7 +160,7 @@ firstmutable = len(cl) roots = repo._phasecache.nonpublicphaseroots(repo) if roots: - firstmutable = min(firstmutable, min(cl.rev(r) for r in roots)) + firstmutable = min(firstmutable, min(roots)) # protect from nullrev root firstmutable = max(0, firstmutable) return frozenset(range(firstmutable, len(cl)))
--- a/rust/hg-cpython/src/revlog.rs Tue Feb 20 21:40:08 2024 +0100 +++ b/rust/hg-cpython/src/revlog.rs Tue Feb 20 21:40:13 2024 +0100 @@ -970,31 +970,16 @@ py_roots: PyDict, ) -> PyResult<PyObject> { let index = &*self.index(py).borrow(); - let opt = self.get_nodetree(py)?.borrow(); - let nt = opt.as_ref().unwrap(); let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots .items_list(py) .iter(py) .map(|r| { let phase = r.get_item(py, 0)?; - let nodes = r.get_item(py, 1)?; - // Transform the nodes from Python to revs here since we - // have access to the nodemap - let revs: Result<_, _> = nodes - .iter(py)? - .map(|node| match node?.extract::<PyBytes>(py) { - Ok(py_bytes) => { - let node = node_from_py_bytes(py, &py_bytes)?; - nt.find_bin(index, node.into()) - .map_err(|e| nodemap_error(py, e))? - .ok_or_else(|| revlog_error(py)) - } - Err(e) => Err(e), - }) - .collect(); + let revs: Vec<_> = + rev_pyiter_collect(py, &r.get_item(py, 1)?, index)?; let phase = Phase::try_from(phase.extract::<usize>(py)?) .map_err(|_| revlog_error(py)); - Ok((phase?, revs?)) + Ok((phase?, revs)) }) .collect(); let (len, phase_maps) = index
--- a/tests/test-debugcommands.t Tue Feb 20 21:40:08 2024 +0100 +++ b/tests/test-debugcommands.t Tue Feb 20 21:40:13 2024 +0100 @@ -197,7 +197,7 @@ node trie depth: 1 node trie last rev scanned: -1 (no-rust !) node trie last rev scanned: 3 (rust !) - node trie lookups: 4 (no-rust !) + node trie lookups: 3 (no-rust !) node trie lookups: 2 (rust !) node trie misses: 1 node trie splits: 1
--- a/tests/test-parseindex.t Tue Feb 20 21:40:08 2024 +0100 +++ b/tests/test-parseindex.t Tue Feb 20 21:40:13 2024 +0100 @@ -187,7 +187,7 @@ > ops = [ > ('reachableroots', > lambda: cl.index.reachableroots2(0, [1], [0], False)), - > ('compute_phases_map_sets', lambda: cl.computephases({1: {cl.node(0)}})), + > ('compute_phases_map_sets', lambda: cl.computephases({1: {0}})), > ('index_headrevs', lambda: cl.headrevs()), > ('find_gca_candidates', lambda: cl.commonancestorsheads(n0, n1)), > ('find_deepest', lambda: cl.ancestor(n0, n1)),