Mercurial > hg-stable
changeset 51371:508fd40dc86a
branching: merge stable into default
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Mon, 12 Feb 2024 16:22:47 +0100 |
parents | 99869dcf3ba0 (diff) 5f62d45e5289 (current diff) |
children | 0d414fb8336f |
files | mercurial/commands.py rust/hg-core/src/revlog/changelog.rs tests/test-censor.t |
diffstat | 153 files changed, 6140 insertions(+), 2235 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Mon Feb 12 16:17:08 2024 +0100 +++ b/Makefile Mon Feb 12 16:22:47 2024 +0100 @@ -296,10 +296,12 @@ rm -rf $(PYOX_DIR)/doc cp -a doc $(PYOX_DIR)/doc +pytype-docker: + contrib/docker/pytype/recipe.sh .PHONY: help all local build doc cleanbutpackages clean install install-bin \ install-doc install-home install-home-bin install-home-doc \ dist dist-notests check tests rust-tests check-code format-c \ update-pot pyoxidizer pyoxidizer-windows-tests pyoxidizer-macos-tests \ $(packaging_targets) \ - osx + osx pytype-docker
--- a/contrib/check-pytype.sh Mon Feb 12 16:17:08 2024 +0100 +++ b/contrib/check-pytype.sh Mon Feb 12 16:22:47 2024 +0100 @@ -3,7 +3,7 @@ set -e set -u -cd `hg root` +cd "$(hg root)" # Many of the individual files that are excluded here confuse pytype # because they do a mix of Python 2 and Python 3 things @@ -71,7 +71,7 @@ # TODO: include hgext and hgext3rd -pytype -V 3.7 --keep-going --jobs auto \ +pytype --keep-going --jobs auto \ doc/check-seclevel.py hgdemandimport hgext mercurial \ -x hgext/absorb.py \ -x hgext/bugzilla.py \ @@ -127,5 +127,7 @@ -x mercurial/wireprotov1peer.py \ -x mercurial/wireprotov1server.py -echo 'pytype crashed while generating the following type stubs:' -find .pytype/pyi -name '*.pyi' | xargs grep -l '# Caught error' | sort +if find .pytype/pyi -name '*.pyi' | xargs grep -ql '# Caught error'; then + echo 'pytype crashed while generating the following type stubs:' + find .pytype/pyi -name '*.pyi' | xargs grep -l '# Caught error' | sort +fi
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/docker/pytype/Dockerfile Mon Feb 12 16:22:47 2024 +0100 @@ -0,0 +1,14 @@ +FROM registry.heptapod.net/mercurial/ci-images/mercurial-core:v2.0 + +USER ci-runner + +ENV PATH=/home/ci-runner/.local/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin +ENV PYTHONPATH=/home/ci-runner/.local/lib/python3.11/site-packages + +RUN python3 -m pip install --user --break-system-packages --upgrade pytype==2023.11.21 + +ADD --chown=ci-runner entrypoint.sh /home/ci-runner/entrypoint.sh + +RUN chmod -R a=rwX /home/ci-runner/.local/ /home/ci-runner/entrypoint.sh + +CMD /home/ci-runner/entrypoint.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/docker/pytype/entrypoint.sh Mon Feb 12 16:22:47 2024 +0100 @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -euo pipefail + +cd /tmp/mercurial-ci/ +make local +./contrib/setup-pytype.sh +./contrib/check-pytype.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/contrib/docker/pytype/recipe.sh Mon Feb 12 16:22:47 2024 +0100 @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +# find repo-root without calling hg as this might be run with sudo +THIS="$(readlink -m "$0")" +HERE="$(dirname "$THIS")" +HG_ROOT="$(readlink -m "$HERE"/../../..)" +echo source mercurial repository: "$HG_ROOT" + +# find actual user as this might be run with sudo +if [ -n "$SUDO_UID" ]; then + ACTUAL_UID="$SUDO_UID" +else + ACTUAL_UID="$(id -u)" +fi +if [ -n "$SUDO_GID" ]; then + ACTUAL_GID="$SUDO_GID" +else + ACTUAL_GID="$(id -g)" +fi +echo using user "$ACTUAL_UID:$ACTUAL_GID" +if groups | egrep -q '\<(docker|root)\>' ; then + env DOCKER_BUILDKIT=1 docker build --tag mercurial-pytype-checker "$HERE" + docker run --rm -it --user "$ACTUAL_UID:$ACTUAL_GID" -v "$HG_ROOT:/tmp/mercurial-ci" mercurial-pytype-checker +else + echo "user not in the docker group" >&2 + echo "(consider running this with \`sudo\`)" >&2 + exit 255 +fi
--- a/hgext/censor.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/censor.py Mon Feb 12 16:22:47 2024 +0100 @@ -22,7 +22,9 @@ simply fail when asked to produce censored data. Others, like ``hg verify`` and ``hg update``, must be capable of tolerating censored data to continue to function in a meaningful way. Such commands only tolerate censored file -revisions if they are allowed by the "censor.policy=ignore" config option. +As having a censored version in a checkout is impractical. The current head +revisions of the repository are checked. If the revision to be censored is in +any of them the command will abort. A few informative commands such as ``hg grep`` will unconditionally ignore censored data and merely report that it was encountered. @@ -34,7 +36,6 @@ from mercurial import ( error, - logcmdutil, registrar, scmutil, ) @@ -54,25 +55,39 @@ ( b'r', b'rev', - b'', + [], _(b'censor file from specified revision'), _(b'REV'), ), + ( + b'', + b'check-heads', + True, + _(b'check that repository heads are not affected'), + ), (b't', b'tombstone', b'', _(b'replacement tombstone data'), _(b'TEXT')), ], _(b'-r REV [-t TEXT] [FILE]'), helpcategory=command.CATEGORY_MAINTENANCE, ) -def censor(ui, repo, path, rev=b'', tombstone=b'', **opts): +def censor(ui, repo, path, rev=(), tombstone=b'', check_heads=True, **opts): with repo.wlock(), repo.lock(): - return _docensor(ui, repo, path, rev, tombstone, **opts) + return _docensor( + ui, + repo, + path, + rev, + tombstone, + check_heads=check_heads, + **opts, + ) -def _docensor(ui, repo, path, rev=b'', tombstone=b'', **opts): +def _docensor(ui, repo, path, revs=(), tombstone=b'', check_heads=True, **opts): if not path: raise error.Abort(_(b'must specify file path to censor')) - if not rev: - raise error.Abort(_(b'must specify revision to censor')) + if not revs: + raise error.Abort(_(b'must specify revisions to censor')) wctx = repo[None] @@ -84,30 +99,36 @@ if not len(flog): raise error.Abort(_(b'cannot censor file with no history')) - rev = logcmdutil.revsingle(repo, rev, rev).rev() - try: - ctx = repo[rev] - except KeyError: - raise error.Abort(_(b'invalid revision identifier %s') % rev) - - try: - fctx = ctx.filectx(path) - except error.LookupError: - raise error.Abort(_(b'file does not exist at revision %s') % rev) + revs = scmutil.revrange(repo, revs) + if not revs: + raise error.Abort(_(b'no matching revisions')) + file_nodes = set() + for r in revs: + try: + ctx = repo[r] + file_nodes.add(ctx.filectx(path).filenode()) + except error.LookupError: + raise error.Abort(_(b'file does not exist at revision %s') % ctx) - fnode = fctx.filenode() - heads = [] - for headnode in repo.heads(): - hc = repo[headnode] - if path in hc and hc.filenode(path) == fnode: - heads.append(hc) - if heads: - headlist = b', '.join([short(c.node()) for c in heads]) - raise error.Abort( - _(b'cannot censor file in heads (%s)') % headlist, - hint=_(b'clean/delete and commit first'), - ) + if check_heads: + heads = [] + repo_heads = repo.heads() + msg = b'checking for the censored content in %d heads\n' + msg %= len(repo_heads) + ui.status(msg) + for headnode in repo_heads: + hc = repo[headnode] + if path in hc and hc.filenode(path) in file_nodes: + heads.append(hc) + if heads: + headlist = b', '.join([short(c.node()) for c in heads]) + raise error.Abort( + _(b'cannot censor file in heads (%s)') % headlist, + hint=_(b'clean/delete and commit first'), + ) + msg = b'checking for the censored content in the working directory\n' + ui.status(msg) wp = wctx.parents() if ctx.node() in [p.node() for p in wp]: raise error.Abort( @@ -115,5 +136,8 @@ hint=_(b'clean/delete/update first'), ) + msg = b'censoring %d file revisions\n' + msg %= len(file_nodes) + ui.status(msg) with repo.transaction(b'censor') as tr: - flog.censorrevision(tr, fnode, tombstone=tombstone) + flog.censorrevision(tr, file_nodes, tombstone=tombstone)
--- a/hgext/git/dirstate.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/git/dirstate.py Mon Feb 12 16:22:47 2024 +0100 @@ -389,7 +389,7 @@ # TODO: should this be added to the dirstate interface? self._plchangecallbacks[category] = callback - def setbranch(self, branch, transaction=None): + def setbranch(self, branch, transaction): raise error.Abort( b'git repos do not support branches. try using bookmarks' )
--- a/hgext/git/gitlog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/git/gitlog.py Mon Feb 12 16:22:47 2024 +0100 @@ -324,7 +324,7 @@ if common is None: common = [sha1nodeconstants.nullid] if heads is None: - heads = self.heads() + heads = [self.node(r) for r in self.headrevs()] common = [self.rev(n) for n in common] heads = [self.rev(n) for n in heads]
--- a/hgext/narrow/narrowcommands.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/narrow/narrowcommands.py Mon Feb 12 16:22:47 2024 +0100 @@ -296,7 +296,7 @@ for file_ in entry.files(): todelete.append(file_.unencoded_path) elif entry.is_manifestlog: - dir = entry.target_id + dir = entry.target_id[:-1] dirs = sorted(pathutil.dirs({dir})) + [dir] include = True for d in dirs:
--- a/hgext/remotefilelog/basepack.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/remotefilelog/basepack.py Mon Feb 12 16:22:47 2024 +0100 @@ -501,7 +501,7 @@ self.idxfp.write(rawindex) self.idxfp.close() - def createindex(self, nodelocations): + def createindex(self, nodelocations, indexoffset): raise NotImplementedError() def _writeheader(self, indexparams):
--- a/hgext/remotefilelog/remotefilelog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/remotefilelog/remotefilelog.py Mon Feb 12 16:22:47 2024 +0100 @@ -7,7 +7,6 @@ # GNU General Public License version 2 or any later version. import collections -import os from mercurial.node import bin from mercurial.i18n import _ @@ -22,7 +21,6 @@ from . import ( constants, - fileserverclient, shallowutil, ) @@ -387,33 +385,6 @@ def rawdata(self, node): return self.revision(node, raw=False) - def _read(self, id): - """reads the raw file blob from disk, cache, or server""" - fileservice = self.repo.fileservice - localcache = fileservice.localcache - cachekey = fileserverclient.getcachekey( - self.repo.name, self.filename, id - ) - try: - return localcache.read(cachekey) - except KeyError: - pass - - localkey = fileserverclient.getlocalkey(self.filename, id) - localpath = os.path.join(self.localpath, localkey) - try: - return shallowutil.readfile(localpath) - except IOError: - pass - - fileservice.prefetch([(self.filename, id)]) - try: - return localcache.read(cachekey) - except KeyError: - pass - - raise error.LookupError(id, self.filename, _(b'no node')) - def ancestormap(self, node): return self.repo.metadatastore.getancestors(self.filename, node)
--- a/hgext/remotenames.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/remotenames.py Mon Feb 12 16:22:47 2024 +0100 @@ -134,10 +134,10 @@ def __len__(self): return len(self.potentialentries) - def __setitem__(self): + def __setitem__(self, k, v): raise NotImplementedError - def __delitem__(self): + def __delitem__(self, k): raise NotImplementedError def _fetchandcache(self, key):
--- a/hgext/sparse.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/sparse.py Mon Feb 12 16:22:47 2024 +0100 @@ -371,8 +371,7 @@ sparse.clearrules(repo, force=force) if refresh: - try: - wlock = repo.wlock() + with repo.wlock(): fcounts = pycompat.maplist( len, sparse.refreshwdir( @@ -386,7 +385,5 @@ dropped=fcounts[1], conflicting=fcounts[2], ) - finally: - wlock.release() del repo._has_sparse
--- a/hgext/sqlitestore.py Mon Feb 12 16:17:08 2024 +0100 +++ b/hgext/sqlitestore.py Mon Feb 12 16:22:47 2024 +0100 @@ -810,7 +810,11 @@ return not empty - def censorrevision(self, tr, censornode, tombstone=b''): + def censorrevision(self, tr, censor_nodes, tombstone=b''): + for node in censor_nodes: + self._censor_one_revision(tr, node, tombstone=tombstone) + + def _censor_one_revision(self, tr, censornode, tombstone): tombstone = storageutil.packmeta({b'censored': tombstone}, b'') # This restriction is cargo culted from revlogs and makes no sense for
--- a/mercurial/branchmap.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/branchmap.py Mon Feb 12 16:22:47 2024 +0100 @@ -13,47 +13,36 @@ hex, nullrev, ) + +from typing import ( + Callable, + Dict, + Iterable, + List, + Optional, + Set, + TYPE_CHECKING, + Tuple, + Union, +) + from . import ( encoding, error, obsolete, - pycompat, scmutil, util, ) + from .utils import ( repoviewutil, stringutil, ) -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Set, - Tuple, - Union, - ) +if TYPE_CHECKING: from . import localrepo - assert any( - ( - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Set, - Tuple, - Union, - localrepo, - ) - ) + assert [localrepo] subsettable = repoviewutil.subsettable @@ -193,15 +182,16 @@ def __init__( self, - repo, - entries=(), - tipnode=None, - tiprev=nullrev, - filteredhash=None, - closednodes=None, - hasnode=None, - ): - # type: (localrepo.localrepository, Union[Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]], bytes, int, Optional[bytes], Optional[Set[bytes]], Optional[Callable[[bytes], bool]]) -> None + repo: "localrepo.localrepository", + entries: Union[ + Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]] + ] = (), + tipnode: Optional[bytes] = None, + tiprev: Optional[int] = nullrev, + filteredhash: Optional[bytes] = None, + closednodes: Optional[Set[bytes]] = None, + hasnode: Optional[Callable[[bytes], bool]] = None, + ) -> None: """hasnode is a function which can be used to verify whether changelog has a given node or not. If it's not provided, we assume that every node we have exists in changelog"""
--- a/mercurial/cext/revlog.c Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/cext/revlog.c Mon Feb 12 16:22:47 2024 +0100 @@ -3037,7 +3037,7 @@ self->offsets = NULL; self->nodelen = 20; self->nullentry = NULL; - self->rust_ext_compat = 1; + self->rust_ext_compat = 0; self->format_version = format_v1; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|l", kwlist, @@ -3055,6 +3055,7 @@ } if (self->format_version == format_v1) { + self->rust_ext_compat = 1; self->entry_size = v1_entry_size; } else if (self->format_version == format_v2) { self->entry_size = v2_entry_size;
--- a/mercurial/changelog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/changelog.py Mon Feb 12 16:22:47 2024 +0100 @@ -308,6 +308,7 @@ persistentnodemap=opener.options.get(b'persistent-nodemap', False), concurrencychecker=concurrencychecker, trypending=trypending, + may_inline=False, ) if self._initempty and (self._format_version == revlog.REVLOGV1): @@ -344,6 +345,11 @@ def delayupdate(self, tr): """delay visibility of index updates to other readers""" assert not self._inner.is_open + assert not self._may_inline + # enforce that older changelog that are still inline are split at the + # first opportunity. + if self._inline: + self._enforceinlinesize(tr) if self._docket is not None: self._v2_delayed = True else: @@ -363,8 +369,9 @@ else: new_index_file = self._inner.finalize_pending() self._indexfile = new_index_file - # split when we're done - self._enforceinlinesize(tr, side_write=False) + if self._inline: + msg = 'changelog should not be inline at that point' + raise error.ProgrammingError(msg) def _writepending(self, tr): """create a file containing the unfinalized state for @@ -380,9 +387,9 @@ tr.registertmp(new_index) return any_pending - def _enforceinlinesize(self, tr, side_write=True): + def _enforceinlinesize(self, tr): if not self.is_delaying: - revlog.revlog._enforceinlinesize(self, tr, side_write=side_write) + revlog.revlog._enforceinlinesize(self, tr) def read(self, nodeorrev): """Obtain data from a parsed changelog revision.
--- a/mercurial/chgserver.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/chgserver.py Mon Feb 12 16:22:47 2024 +0100 @@ -48,6 +48,10 @@ import struct import time +from typing import ( + Optional, +) + from .i18n import _ from .node import hex @@ -628,14 +632,16 @@ pollinterval = 1 # [sec] + _hashstate: Optional[hashstate] + _baseaddress: Optional[bytes] + _realaddress: Optional[bytes] + def __init__(self, ui): self.ui = ui - # TODO: use PEP 526 syntax (`_hashstate: hashstate` at the class level) - # when 3.5 support is dropped. - self._hashstate = None # type: hashstate - self._baseaddress = None # type: bytes - self._realaddress = None # type: bytes + self._hashstate = None + self._baseaddress = None + self._realaddress = None self._idletimeout = ui.configint(b'chgserver', b'idletimeout') self._lastactive = time.time()
--- a/mercurial/cmdutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/cmdutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -18,6 +18,7 @@ Dict, Iterable, Optional, + TYPE_CHECKING, cast, ) @@ -71,7 +72,7 @@ constants as revlog_constants, ) -if pycompat.TYPE_CHECKING: +if TYPE_CHECKING: from . import ( ui as uimod, ) @@ -2381,8 +2382,19 @@ full=False, ) ): + entry = dirstate.get_entry(f) + # We don't want to even attmpt to add back files that have been removed + # It would lead to a misleading message saying we're adding the path, + # and can also lead to file/dir conflicts when attempting to add it. + removed = entry and entry.removed exact = match.exact(f) - if exact or not explicitonly and f not in wctx and repo.wvfs.lexists(f): + if ( + exact + or not explicitonly + and f not in wctx + and repo.wvfs.lexists(f) + and not removed + ): if cca: cca(f) names.append(f) @@ -4106,8 +4118,10 @@ return 0 -def readgraftstate(repo, graftstate): - # type: (Any, statemod.cmdstate) -> Dict[bytes, Any] +def readgraftstate( + repo: Any, + graftstate: statemod.cmdstate, +) -> Dict[bytes, Any]: """read the graft state file and return a dict of the data stored in it""" try: return graftstate.read()
--- a/mercurial/commands.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/commands.py Mon Feb 12 16:22:47 2024 +0100 @@ -29,6 +29,7 @@ copies, debugcommands as debugcommandsmod, destutil, + diffutil, discovery, encoding, error, @@ -2660,7 +2661,7 @@ if change: repo = scmutil.unhidehashlikerevs(repo, [change], b'nowarn') ctx2 = logcmdutil.revsingle(repo, change, None) - ctx1 = logcmdutil.diff_parent(ctx2) + ctx1 = diffutil.diff_parent(ctx2) elif from_rev or to_rev: repo = scmutil.unhidehashlikerevs( repo, [from_rev] + [to_rev], b'nowarn'
--- a/mercurial/config.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/config.py Mon Feb 12 16:22:47 2024 +0100 @@ -9,6 +9,11 @@ import errno import os +from typing import ( + List, + Tuple, +) + from .i18n import _ from . import ( encoding, @@ -107,7 +112,7 @@ def sections(self): return sorted(self._data.keys()) - def items(self, section): + def items(self, section: bytes) -> List[Tuple[bytes, bytes]]: items = self._data.get(section, {}).items() return [(k, v[0]) for (k, v) in items]
--- a/mercurial/configitems.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/configitems.py Mon Feb 12 16:22:47 2024 +0100 @@ -82,7 +82,7 @@ super(itemregister, self).__init__() self._generics = set() - def update(self, other): + def update(self, other): # pytype: disable=signature-mismatch super(itemregister, self).update(other) self._generics.update(other._generics)
--- a/mercurial/configitems.toml Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/configitems.toml Mon Feb 12 16:22:47 2024 +0100 @@ -1111,28 +1111,6 @@ [[items]] section = "experimental" -name = "revlog.uncompressed-cache.enabled" -default = true -experimental = true -documentation = """Enable some caching of uncompressed chunk, greatly boosting -performance at the cost of memory usage.""" - -[[items]] -section = "experimental" -name = "revlog.uncompressed-cache.factor" -default = 4 -experimental = true -documentation = """The size of the cache compared to the largest revision seen.""" - -[[items]] -section = "experimental" -name = "revlog.uncompressed-cache.count" -default = 10000 -experimental = true -documentation = """The number of chunk cached.""" - -[[items]] -section = "experimental" name = "stream-v3" default = false @@ -2489,6 +2467,76 @@ default = false [[items]] +section = "usage" +name = "repository-role" +default = "default" +documentation = """What this repository is used for. + +This is used to adjust behavior and performance to best fit the repository purpose. + +Currently recognised values are: +- default: an all purpose repository +""" + +[[items]] +section = "usage" +name = "resources" +default = "default" +documentation = """How aggressive Mercurial can be with resource usage: + +Currently recognised values are: +- default: the default value currently is equivalent to medium, +- high: allows for higher cpu, memory and disk-space usage to improve the performance of some operations. +- medium: aims at a moderate resource usage, +- low: reduces resources usage when possible, decreasing overall performance. + +For finer configuration, see also `usage.resources.cpu`, +`usage.resources.disk` and `usage.resources.memory`. +""" + +[[items]] +section = "usage" +name = "resources.cpu" +default = "default" +documentation = """How aggressive Mercurial can be in terms of cpu usage: + +Currently recognised values are: +- default: the default value, inherits the value from `usage.resources`, +- high: allows for more aggressive cpu usage, improving storage quality and + the performance of some operations at the expense of machine load +- medium: aims at a moderate cpu usage, +- low: reduces cpu usage when possible, potentially at the expense of + slower operations, increased storage and exchange payload. + +""" + +[[items]] +section = "usage" +name = "resources.disk" +default = "default" +documentation = """How aggressive Mercurial can be in terms of disk usage: + +Currently recognised values are: +- default: the default value, inherits the value from `usage.resources`, +- high: allows for more disk space usage where it can improve the performance, +- medium: aims at a moderate disk usage, +- low: reduces disk usage when possible, decreasing performance in some occasion. +""" + +[[items]] +section = "usage" +name = "resources.memory" +default = "default" +documentation = """How aggressive Mercurial can be in terms of memory usage: + +Currently recognised values are: +- default: the default value, inherits the value from `usage.resources`, +- high: allows for more aggressive memory usage to improve overall performance, +- medium: aims at a moderate memory usage, +- low: reduces memory usage when possible at the cost of overall performance. +""" + +[[items]] section = "verify" name = "skipflags" default = 0
--- a/mercurial/debugcommands.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/debugcommands.py Mon Feb 12 16:22:47 2024 +0100 @@ -1860,7 +1860,7 @@ repo.changelog.shortest(repo.nullid, 1) index = repo.changelog.index if not hasattr(index, 'stats'): - raise error.Abort(_(b'debugindexstats only works with native code')) + raise error.Abort(_(b'debugindexstats only works with native C code')) for k, v in sorted(index.stats().items()): ui.write(b'%s: %d\n' % (k, v))
--- a/mercurial/diffutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/diffutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -16,6 +16,7 @@ ) from .i18n import _ +from .node import nullrev from . import ( mdiff, @@ -155,3 +156,35 @@ ) return mdiff.diffopts(**pycompat.strkwargs(buildopts)) + + +def diff_parent(ctx): + """get the context object to use as parent when diffing + + + If diff.merge is enabled, an overlayworkingctx of the auto-merged parents will be returned. + """ + repo = ctx.repo() + if repo.ui.configbool(b"diff", b"merge") and ctx.p2().rev() != nullrev: + # avoid circular import + from . import ( + context, + merge, + ) + + wctx = context.overlayworkingctx(repo) + wctx.setbase(ctx.p1()) + with repo.ui.configoverride( + { + ( + b"ui", + b"forcemerge", + ): b"internal:merge3-lie-about-conflicts", + }, + b"merge-diff", + ): + with repo.ui.silent(): + merge.merge(ctx.p2(), wc=wctx) + return wctx + else: + return ctx.p1()
--- a/mercurial/dirstate.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/dirstate.py Mon Feb 12 16:22:47 2024 +0100 @@ -42,9 +42,6 @@ parsers = policy.importmod('parsers') rustmod = policy.importrust('dirstate') -# use to detect lack of a parameter -SENTINEL = object() - HAS_FAST_DIRSTATE_V2 = rustmod is not None propertycache = util.propertycache @@ -408,16 +405,6 @@ """ return self._changing_level > 0 - def pendingparentchange(self): - return self.is_changing_parent() - - def is_changing_parent(self): - """Returns true if the dirstate is in the middle of a set of changes - that modify the dirstate parent. - """ - self._ui.deprecwarn(b"dirstate.is_changing_parents", b"6.5") - return self.is_changing_parents - @property def is_changing_parents(self): """Returns true if the dirstate is in the middle of a set of changes @@ -670,12 +657,8 @@ fold_p2 = oldp2 != nullid and p2 == nullid return self._map.setparents(p1, p2, fold_p2=fold_p2) - def setbranch(self, branch, transaction=SENTINEL): + def setbranch(self, branch, transaction): self.__class__._branch.set(self, encoding.fromlocal(branch)) - if transaction is SENTINEL: - msg = b"setbranch needs a `transaction` argument" - self._ui.deprecwarn(msg, b'6.5') - transaction = None if transaction is not None: self._setup_tr_abort(transaction) transaction.addfilegenerator(
--- a/mercurial/encoding.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/encoding.py Mon Feb 12 16:22:47 2024 +0100 @@ -9,8 +9,16 @@ import locale import os import re +import typing import unicodedata +from typing import ( + Any, + Callable, + Text, + TypeVar, +) + from . import ( error, policy, @@ -19,22 +27,7 @@ from .pure import charencode as charencodepure -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - Callable, - List, - Text, - Type, - TypeVar, - Union, - ) - - # keep pyflakes happy - for t in (Any, Callable, List, Text, Type, Union): - assert t - - _Tlocalstr = TypeVar('_Tlocalstr', bound='localstr') +_Tlocalstr = TypeVar('_Tlocalstr', bound='localstr') charencode = policy.importmod('charencode') @@ -59,8 +52,7 @@ assert all(i.startswith((b"\xe2", b"\xef")) for i in _ignore) -def hfsignoreclean(s): - # type: (bytes) -> bytes +def hfsignoreclean(s: bytes) -> bytes: """Remove codepoints ignored by HFS+ from s. >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8')) @@ -131,10 +123,9 @@ s._utf8 = u return s - if pycompat.TYPE_CHECKING: + if typing.TYPE_CHECKING: # pseudo implementation to help pytype see localstr() constructor - def __init__(self, u, l): - # type: (bytes, bytes) -> None + def __init__(self, u: bytes, l: bytes) -> None: super(localstr, self).__init__(l) self._utf8 = u @@ -153,8 +144,7 @@ """ -def tolocal(s): - # type: (bytes) -> bytes +def tolocal(s: bytes) -> bytes: """ Convert a string from internal UTF-8 to local encoding @@ -222,8 +212,7 @@ ) -def fromlocal(s): - # type: (bytes) -> bytes +def fromlocal(s: bytes) -> bytes: """ Convert a string from the local character encoding to UTF-8 @@ -254,20 +243,17 @@ ) -def unitolocal(u): - # type: (Text) -> bytes +def unitolocal(u: str) -> bytes: """Convert a unicode string to a byte string of local encoding""" return tolocal(u.encode('utf-8')) -def unifromlocal(s): - # type: (bytes) -> Text +def unifromlocal(s: bytes) -> str: """Convert a byte string of local encoding to a unicode string""" return fromlocal(s).decode('utf-8') -def unimethod(bytesfunc): - # type: (Callable[[Any], bytes]) -> Callable[[Any], Text] +def unimethod(bytesfunc: Callable[[Any], bytes]) -> Callable[[Any], str]: """Create a proxy method that forwards __unicode__() and __str__() of Python 3 to __bytes__()""" @@ -285,8 +271,7 @@ strmethod = unimethod -def lower(s): - # type: (bytes) -> bytes +def lower(s: bytes) -> bytes: """best-effort encoding-aware case-folding of local string s""" try: return asciilower(s) @@ -310,8 +295,7 @@ ) -def upper(s): - # type: (bytes) -> bytes +def upper(s: bytes) -> bytes: """best-effort encoding-aware case-folding of local string s""" try: return asciiupper(s) @@ -319,8 +303,7 @@ return upperfallback(s) -def upperfallback(s): - # type: (Any) -> Any +def upperfallback(s: Any) -> Any: try: if isinstance(s, localstr): u = s._utf8.decode("utf-8") @@ -395,14 +378,12 @@ ) -def colwidth(s): - # type: (bytes) -> int +def colwidth(s: bytes) -> int: """Find the column width of a string for display in the local encoding""" return ucolwidth(s.decode(_sysstr(encoding), 'replace')) -def ucolwidth(d): - # type: (Text) -> int +def ucolwidth(d: Text) -> int: """Find the column width of a Unicode string for display""" eaw = getattr(unicodedata, 'east_asian_width', None) if eaw is not None: @@ -410,8 +391,7 @@ return len(d) -def getcols(s, start, c): - # type: (bytes, int, int) -> bytes +def getcols(s: bytes, start: int, c: int) -> bytes: """Use colwidth to find a c-column substring of s starting at byte index start""" for x in range(start + c, len(s)): @@ -421,8 +401,12 @@ raise ValueError('substring not found') -def trim(s, width, ellipsis=b'', leftside=False): - # type: (bytes, int, bytes, bool) -> bytes +def trim( + s: bytes, + width: int, + ellipsis: bytes = b'', + leftside: bool = False, +) -> bytes: """Trim string 's' to at most 'width' columns (including 'ellipsis'). If 'leftside' is True, left side of string 's' is trimmed. @@ -540,8 +524,7 @@ other = 0 -def jsonescape(s, paranoid=False): - # type: (Any, Any) -> Any +def jsonescape(s: Any, paranoid: Any = False) -> Any: """returns a string suitable for JSON JSON is problematic for us because it doesn't support non-Unicode @@ -601,8 +584,7 @@ _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] -def getutf8char(s, pos): - # type: (bytes, int) -> bytes +def getutf8char(s: bytes, pos: int) -> bytes: """get the next full utf-8 character in the given string, starting at pos Raises a UnicodeError if the given location does not start a valid @@ -620,8 +602,7 @@ return c -def toutf8b(s): - # type: (bytes) -> bytes +def toutf8b(s: bytes) -> bytes: """convert a local, possibly-binary string into UTF-8b This is intended as a generic method to preserve data when working @@ -689,8 +670,7 @@ return bytes(r) -def fromutf8b(s): - # type: (bytes) -> bytes +def fromutf8b(s: bytes) -> bytes: """Given a UTF-8b string, return a local, possibly-binary string. return the original binary string. This
--- a/mercurial/error.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/error.py Mon Feb 12 16:22:47 2024 +0100 @@ -14,23 +14,20 @@ import difflib +from typing import ( + AnyStr, + Iterable, + List, + Optional, + Sequence, + Union, +) + # Do not import anything but pycompat here, please from . import pycompat -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - AnyStr, - Iterable, - List, - Optional, - Sequence, - Union, - ) - -def _tobytes(exc): - # type: (...) -> bytes +def _tobytes(exc) -> bytes: """Byte-stringify exception in the same way as BaseException_str()""" if not exc.args: return b'' @@ -47,7 +44,7 @@ """ def __init__(self, *args, **kw): - self.hint = kw.pop('hint', None) # type: Optional[bytes] + self.hint: Optional[bytes] = kw.pop('hint', None) super(Hint, self).__init__(*args, **kw) @@ -57,8 +54,7 @@ coarse_exit_code = None detailed_exit_code = None - def __init__(self, message, hint=None): - # type: (bytes, Optional[bytes]) -> None + def __init__(self, message: bytes, hint: Optional[bytes] = None) -> None: self.message = message self.hint = hint # Pass the message into the Exception constructor to help extensions @@ -68,15 +64,13 @@ def __bytes__(self): return self.message - def __str__(self): - # type: () -> str + def __str__(self) -> str: # the output would be unreadable if the message was translated, # but do not replace it with encoding.strfromlocal(), which # may raise another exception. return pycompat.sysstr(self.__bytes__()) - def format(self): - # type: () -> bytes + def format(self) -> bytes: from .i18n import _ message = _(b"abort: %s\n") % self.message @@ -103,8 +97,7 @@ class SidedataHashError(RevlogError): - def __init__(self, key, expected, got): - # type: (int, bytes, bytes) -> None + def __init__(self, key: int, expected: bytes, got: bytes) -> None: self.hint = None self.sidedatakey = key self.expecteddigest = expected @@ -116,8 +109,7 @@ class LookupError(RevlogError, KeyError): - def __init__(self, name, index, message): - # type: (bytes, bytes, bytes) -> None + def __init__(self, name: bytes, index: bytes, message: bytes) -> None: self.name = name self.index = index # this can't be called 'message' because at least some installs of @@ -154,8 +146,7 @@ class CommandError(Exception): """Exception raised on errors in parsing the command line.""" - def __init__(self, command, message): - # type: (Optional[bytes], bytes) -> None + def __init__(self, command: Optional[bytes], message: bytes) -> None: self.command = command self.message = message super(CommandError, self).__init__() @@ -166,8 +157,11 @@ class UnknownCommand(Exception): """Exception raised if command is not in the command table.""" - def __init__(self, command, all_commands=None): - # type: (bytes, Optional[List[bytes]]) -> None + def __init__( + self, + command: bytes, + all_commands: Optional[List[bytes]] = None, + ) -> None: self.command = command self.all_commands = all_commands super(UnknownCommand, self).__init__() @@ -178,8 +172,7 @@ class AmbiguousCommand(Exception): """Exception raised if command shortcut matches more than one command.""" - def __init__(self, prefix, matches): - # type: (bytes, List[bytes]) -> None + def __init__(self, prefix: bytes, matches: List[bytes]) -> None: self.prefix = prefix self.matches = matches super(AmbiguousCommand, self).__init__() @@ -190,8 +183,7 @@ class WorkerError(Exception): """Exception raised when a worker process dies.""" - def __init__(self, status_code): - # type: (int) -> None + def __init__(self, status_code: int) -> None: self.status_code = status_code # Pass status code to superclass just so it becomes part of __bytes__ super(WorkerError, self).__init__(status_code) @@ -205,8 +197,7 @@ coarse_exit_code = 1 detailed_exit_code = 240 - def format(self): - # type: () -> bytes + def format(self) -> bytes: from .i18n import _ message = _(b"%s\n") % self.message @@ -218,8 +209,7 @@ class ConflictResolutionRequired(InterventionRequired): """Exception raised when a continuable command required merge conflict resolution.""" - def __init__(self, opname): - # type: (bytes) -> None + def __init__(self, opname: bytes) -> None: from .i18n import _ self.opname = opname @@ -288,13 +278,16 @@ detailed_exit_code = 30 - def __init__(self, message, location=None, hint=None): - # type: (bytes, Optional[bytes], Optional[bytes]) -> None + def __init__( + self, + message: bytes, + location: Optional[bytes] = None, + hint: Optional[bytes] = None, + ) -> None: super(ConfigError, self).__init__(message, hint=hint) self.location = location - def format(self): - # type: () -> bytes + def format(self) -> bytes: from .i18n import _ if self.location is not None: @@ -343,8 +336,11 @@ class OutOfBandError(RemoteError): """Exception raised when a remote repo reports failure""" - def __init__(self, message=None, hint=None): - # type: (Optional[bytes], Optional[bytes]) -> None + def __init__( + self, + message: Optional[bytes] = None, + hint: Optional[bytes] = None, + ): from .i18n import _ if message: @@ -360,13 +356,16 @@ detailed_exit_code = 10 - def __init__(self, message, location=None, hint=None): - # type: (bytes, Optional[Union[bytes, int]], Optional[bytes]) -> None + def __init__( + self, + message: bytes, + location: Optional[Union[bytes, int]] = None, + hint: Optional[bytes] = None, + ): super(ParseError, self).__init__(message, hint=hint) self.location = location - def format(self): - # type: () -> bytes + def format(self) -> bytes: from .i18n import _ if self.location is not None: @@ -393,16 +392,14 @@ __bytes__ = _tobytes -def getsimilar(symbols, value): - # type: (Iterable[bytes], bytes) -> List[bytes] +def getsimilar(symbols: Iterable[bytes], value: bytes) -> List[bytes]: sim = lambda x: difflib.SequenceMatcher(None, value, x).ratio() # The cutoff for similarity here is pretty arbitrary. It should # probably be investigated and tweaked. return [s for s in symbols if sim(s) > 0.6] -def similarity_hint(similar): - # type: (List[bytes]) -> Optional[bytes] +def similarity_hint(similar: List[bytes]) -> Optional[bytes]: from .i18n import _ if len(similar) == 1: @@ -417,8 +414,7 @@ class UnknownIdentifier(ParseError): """Exception raised when a {rev,file}set references an unknown identifier""" - def __init__(self, function, symbols): - # type: (bytes, Iterable[bytes]) -> None + def __init__(self, function: bytes, symbols: Iterable[bytes]) -> None: from .i18n import _ similar = getsimilar(symbols, function) @@ -452,16 +448,14 @@ class StdioError(IOError): """Raised if I/O to stdout or stderr fails""" - def __init__(self, err): - # type: (IOError) -> None + def __init__(self, err: IOError) -> None: IOError.__init__(self, err.errno, err.strerror) # no __bytes__() because error message is derived from the standard IOError class UnsupportedMergeRecords(Abort): - def __init__(self, recordtypes): - # type: (Iterable[bytes]) -> None + def __init__(self, recordtypes: Iterable[bytes]) -> None: from .i18n import _ self.recordtypes = sorted(recordtypes) @@ -479,16 +473,24 @@ class UnknownVersion(Abort): """generic exception for aborting from an encounter with an unknown version""" - def __init__(self, msg, hint=None, version=None): - # type: (bytes, Optional[bytes], Optional[bytes]) -> None + def __init__( + self, + msg: bytes, + hint: Optional[bytes] = None, + version: Optional[bytes] = None, + ) -> None: self.version = version super(UnknownVersion, self).__init__(msg, hint=hint) class LockError(IOError): - def __init__(self, errno, strerror, filename, desc): - # TODO: figure out if this should be bytes or str - # _type: (int, str, str, bytes) -> None + def __init__( + self, + errno: int, + strerror: str, + filename: bytes, + desc: Optional[bytes], + ) -> None: IOError.__init__(self, errno, strerror, filename) self.desc = desc @@ -496,8 +498,15 @@ class LockHeld(LockError): - def __init__(self, errno, filename, desc, locker): - LockError.__init__(self, errno, b'Lock held', filename, desc) + def __init__( + self, + errno: int, + filename: bytes, + desc: Optional[bytes], + locker, + ): + LockError.__init__(self, errno, 'Lock held', filename, desc) + self.filename: bytes = filename self.locker = locker @@ -534,8 +543,7 @@ class ProgrammingError(Hint, RuntimeError): """Raised if a mercurial (core or extension) developer made a mistake""" - def __init__(self, msg, *args, **kwargs): - # type: (AnyStr, Any, Any) -> None + def __init__(self, msg: AnyStr, *args, **kwargs): # On Python 3, turn the message back into a string since this is # an internal-only error that won't be printed except in a # stack traces. @@ -612,8 +620,7 @@ Also contains the tombstone data substituted for the uncensored data. """ - def __init__(self, filename, node, tombstone): - # type: (bytes, bytes, bytes) -> None + def __init__(self, filename: bytes, node: bytes, tombstone: bytes): from .node import short StorageError.__init__(self, b'%s:%s' % (filename, short(node))) @@ -675,7 +682,10 @@ The error is a formatter string and an optional iterable of arguments. """ - def __init__(self, message, args=None): - # type: (bytes, Optional[Sequence[bytes]]) -> None + def __init__( + self, + message: bytes, + args: Optional[Sequence[bytes]] = None, + ) -> None: self.message = message self.messageargs = args
--- a/mercurial/extensions.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/extensions.py Mon Feb 12 16:22:47 2024 +0100 @@ -625,9 +625,8 @@ def __init__(self, container, funcname, wrapper): assert callable(wrapper) if not isinstance(funcname, str): - msg = b"pass wrappedfunction target name as `str`, not `bytes`" - util.nouideprecwarn(msg, b"6.6", stacklevel=2) - funcname = pycompat.sysstr(funcname) + msg = b"wrappedfunction target name should be `str`, not `bytes`" + raise TypeError(msg) self._container = container self._funcname = funcname self._wrapper = wrapper @@ -675,9 +674,8 @@ assert callable(wrapper) if not isinstance(funcname, str): - msg = b"pass wrapfunction target name as `str`, not `bytes`" - util.nouideprecwarn(msg, b"6.6", stacklevel=2) - funcname = pycompat.sysstr(funcname) + msg = b"wrapfunction target name should be `str`, not `bytes`" + raise TypeError(msg) origfn = getattr(container, funcname) assert callable(origfn)
--- a/mercurial/helptext/config.txt Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/helptext/config.txt Mon Feb 12 16:22:47 2024 +0100 @@ -2994,6 +2994,64 @@ Increase the amount of output printed. (default: False) +``usage`` +--------- + +``repository-role`` + What this repository is used for. + + This is used to adjust behavior and performance to best fit the repository purpose. + + Currently recognised values are: + - default: an all purpose repository + +``resources`` + How aggressive Mercurial can be with resource usage: + + Currently recognised values are: + - default: the default value currently is equivalent to medium, + - high: allows for higher cpu, memory and disk-space usage to improve + performance of some operations. + - medium: aims at a moderate resource usage, + - low: reduces resources usage when possible, decreasing overall + performance. + + For finer configuration, see also `usage.resources.cpu`, + `usage.resources.disk` and `usage.resources.memory`. + +``resources.cpu`` + How aggressive Mercurial can be in terms of cpu usage: + + Currently recognised values are: + - default: the default value, inherits the value from `usage.resources`, + - high: allows for more aggressive cpu usage, improving storage quality and + the performance of some operations at the expense of machine load + - medium: aims at a moderate cpu usage, + - low: reduces cpu usage when possible, potentially at the expense of + slower operations, increased storage and exchange payload. + +``resources.disk`` + How aggressive Mercurial can be in terms of disk usage: + + Currently recognised values are: + - default: the default value, inherits the value from `usage.resources`, + - high: allows for more disk space usage where it can improve performance, + - medium: aims at a moderate disk usage, + - low: reduces disk usage when possible, decreasing performance in some + occasion. + +``resources.memory`` + How aggressive Mercurial can be in terms of memory usage: + + Currently recognised values are: + - default: the default value, inherits the value from `usage.resources`, + - high: allows for more aggressive memory usage to improve overall + performance, + - medium: aims at a moderate memory usage, + - low: reduces memory usage when possible at the cost of overall + performance. + + ``command-templates`` ---------------------
--- a/mercurial/hgweb/hgwebdir_mod.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/hgweb/hgwebdir_mod.py Mon Feb 12 16:22:47 2024 +0100 @@ -410,15 +410,15 @@ gc.collect(generation=1) def _runwsgi(self, req, res): - try: - self.refresh() + self.refresh() - csp, nonce = cspvalues(self.ui) - if csp: - res.headers[b'Content-Security-Policy'] = csp + csp, nonce = cspvalues(self.ui) + if csp: + res.headers[b'Content-Security-Policy'] = csp - virtual = req.dispatchpath.strip(b'/') - tmpl = self.templater(req, nonce) + virtual = req.dispatchpath.strip(b'/') + tmpl = self.templater(req, nonce) + try: ctype = tmpl.render(b'mimetype', {b'encoding': encoding.encoding}) # Global defaults. These can be overridden by any handler.
--- a/mercurial/hgweb/webcommands.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/hgweb/webcommands.py Mon Feb 12 16:22:47 2024 +0100 @@ -516,8 +516,7 @@ rev = webcommand(b'rev')(changeset) -def decodepath(path): - # type: (bytes) -> bytes +def decodepath(path: bytes) -> bytes: """Hook for mapping a path in the repository to a path in the working copy.
--- a/mercurial/i18n.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/i18n.py Mon Feb 12 16:22:47 2024 +0100 @@ -11,19 +11,16 @@ import os import sys +from typing import ( + List, +) + from .utils import resourceutil from . import ( encoding, pycompat, ) -if pycompat.TYPE_CHECKING: - from typing import ( - Callable, - List, - ) - - # modelled after templater.templatepath: if getattr(sys, 'frozen', None) is not None: module = pycompat.sysexecutable @@ -67,8 +64,7 @@ _msgcache = {} # encoding: {message: translation} -def gettext(message): - # type: (bytes) -> bytes +def gettext(message: bytes) -> bytes: """Translate message. The message is looked up in the catalog to get a Unicode string, @@ -86,7 +82,7 @@ if message not in cache: if type(message) is str: # goofy unicode docstrings in test - paragraphs = message.split(u'\n\n') # type: List[str] + paragraphs: List[str] = message.split(u'\n\n') else: # should be ascii, but we have unicode docstrings in test, which # are converted to utf-8 bytes on Python 3. @@ -119,6 +115,10 @@ if _plain(): - _ = lambda message: message # type: Callable[[bytes], bytes] + + def _(message: bytes) -> bytes: + return message + + else: _ = gettext
--- a/mercurial/interfaces/dirstate.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/interfaces/dirstate.py Mon Feb 12 16:22:47 2024 +0100 @@ -123,7 +123,7 @@ See localrepo.setparents() """ - def setbranch(branch, transaction=None): + def setbranch(branch, transaction): pass def invalidate():
--- a/mercurial/linelog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/linelog.py Mon Feb 12 16:22:47 2024 +0100 @@ -45,7 +45,7 @@ @attr.s class annotateresult: rev = attr.ib() - lines = attr.ib() + lines = attr.ib(type=bytearray) _eof = attr.ib() def __iter__(self):
--- a/mercurial/localrepo.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/localrepo.py Mon Feb 12 16:22:47 2024 +0100 @@ -369,7 +369,7 @@ common=None, bundlecaps=None, remote_sidedata=None, - **kwargs + **kwargs, ): chunks = exchange.getbundlechunks( self._repo, @@ -378,7 +378,7 @@ common=common, bundlecaps=bundlecaps, remote_sidedata=remote_sidedata, - **kwargs + **kwargs, )[1] cb = util.chunkbuffer(chunks) @@ -1089,15 +1089,12 @@ if chunkcachesize is not None: data_config.chunk_cache_size = chunkcachesize - if ui.configbool(b'experimental', b'revlog.uncompressed-cache.enabled'): - factor = ui.configint( - b'experimental', b'revlog.uncompressed-cache.factor' - ) - count = ui.configint( - b'experimental', b'revlog.uncompressed-cache.count' - ) - data_config.uncompressed_cache_factor = factor - data_config.uncompressed_cache_count = count + memory_profile = scmutil.get_resource_profile(ui, b'memory') + if memory_profile >= scmutil.RESOURCE_MEDIUM: + data_config.uncompressed_cache_count = 10_000 + data_config.uncompressed_cache_factor = 4 + if memory_profile >= scmutil.RESOURCE_HIGH: + data_config.uncompressed_cache_factor = 10 delta_config.delta_both_parents = ui.configbool( b'storage', b'revlog.optimize-delta-parent-choice' @@ -2401,7 +2398,7 @@ data: bytes, flags: bytes, backgroundclose=False, - **kwargs + **kwargs, ) -> int: """write ``data`` into ``filename`` in the working directory @@ -2584,7 +2581,7 @@ repo.hook( b'pretxnclose-bookmark', throw=True, - **pycompat.strkwargs(args) + **pycompat.strkwargs(args), ) if hook.hashook(repo.ui, b'pretxnclose-phase'): cl = repo.unfiltered().changelog @@ -2596,7 +2593,7 @@ repo.hook( b'pretxnclose-phase', throw=True, - **pycompat.strkwargs(args) + **pycompat.strkwargs(args), ) repo.hook( @@ -2671,7 +2668,7 @@ repo.hook( b'txnclose-bookmark', throw=False, - **pycompat.strkwargs(args) + **pycompat.strkwargs(args), ) if hook.hashook(repo.ui, b'txnclose-phase'): @@ -2687,7 +2684,7 @@ repo.hook( b'txnclose-phase', throw=False, - **pycompat.strkwargs(args) + **pycompat.strkwargs(args), ) repo.hook( @@ -2921,17 +2918,7 @@ unfi = self.unfiltered() - if full: - msg = ( - "`full` argument for `repo.updatecaches` is deprecated\n" - "(use `caches=repository.CACHE_ALL` instead)" - ) - self.ui.deprecwarn(msg, b"5.9") - caches = repository.CACHES_ALL - if full == b"post-clone": - caches = repository.CACHES_POST_CLONE - caches = repository.CACHES_ALL - elif caches is None: + if caches is None: caches = repository.CACHES_DEFAULT if repository.CACHE_BRANCHMAP_SERVED in caches:
--- a/mercurial/lock.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/lock.py Mon Feb 12 16:22:47 2024 +0100 @@ -12,6 +12,7 @@ import signal import socket import time +import typing import warnings from .i18n import _ @@ -154,8 +155,12 @@ if delay == warningidx: printwarning(ui.warn, inst.locker) if timeout <= delay: + assert isinstance(inst.filename, bytes) raise error.LockHeld( - errno.ETIMEDOUT, inst.filename, l.desc, inst.locker + errno.ETIMEDOUT, + typing.cast(bytes, inst.filename), + l.desc, + inst.locker, ) time.sleep(1) delay += 1 @@ -290,8 +295,13 @@ locker, ) else: + assert isinstance(why.filename, bytes) + assert isinstance(why.strerror, str) raise error.LockUnavailable( - why.errno, why.strerror, why.filename, self.desc + why.errno, + why.strerror, + typing.cast(bytes, why.filename), + self.desc, ) if not self.held:
--- a/mercurial/logcmdutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/logcmdutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -10,19 +10,28 @@ import os import posixpath +from typing import ( + Any, + Callable, + Dict, + Optional, + Sequence, + Tuple, +) + from .i18n import _ -from .node import nullrev, wdirrev +from .node import wdirrev from .thirdparty import attr from . import ( dagop, + diffutil, error, formatter, graphmod, match as matchmod, mdiff, - merge, patch, pathutil, pycompat, @@ -40,20 +49,6 @@ ) -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - Callable, - Dict, - Optional, - Sequence, - Tuple, - ) - - for t in (Any, Callable, Dict, Optional, Tuple): - assert t - - def getlimit(opts): """get the log limit according to option -l/--limit""" limit = opts.get(b'limit') @@ -69,36 +64,7 @@ return limit -def diff_parent(ctx): - """get the context object to use as parent when diffing - - - If diff.merge is enabled, an overlayworkingctx of the auto-merged parents will be returned. - """ - repo = ctx.repo() - if repo.ui.configbool(b"diff", b"merge") and ctx.p2().rev() != nullrev: - # avoid cycle context -> subrepo -> cmdutil -> logcmdutil - from . import context - - wctx = context.overlayworkingctx(repo) - wctx.setbase(ctx.p1()) - with repo.ui.configoverride( - { - ( - b"ui", - b"forcemerge", - ): b"internal:merge3-lie-about-conflicts", - }, - b"merge-diff", - ): - with repo.ui.silent(): - merge.merge(ctx.p2(), wc=wctx) - return wctx - else: - return ctx.p1() - - -def diffordiffstat( +def get_diff_chunks( ui, repo, diffopts, @@ -107,14 +73,10 @@ match, changes=None, stat=False, - fp=None, - graphwidth=0, prefix=b'', root=b'', - listsubrepos=False, hunksfilterfn=None, ): - '''show diff or diffstat.''' if root: relroot = pathutil.canonpath(repo.root, repo.getcwd(), root) else: @@ -159,14 +121,11 @@ if stat: diffopts = diffopts.copy(context=0, noprefix=False) - width = 80 - if not ui.plain(): - width = ui.termwidth() - graphwidth # If an explicit --root was given, don't respect ui.relative-paths if not relroot: pathfn = compose(scmutil.getuipathfn(repo), pathfn) - chunks = ctx2.diff( + return ctx2.diff( ctx1, match, changes, @@ -176,6 +135,45 @@ hunksfilterfn=hunksfilterfn, ) + +def diffordiffstat( + ui, + repo, + diffopts, + ctx1, + ctx2, + match, + changes=None, + stat=False, + fp=None, + graphwidth=0, + prefix=b'', + root=b'', + listsubrepos=False, + hunksfilterfn=None, +): + '''show diff or diffstat.''' + + chunks = get_diff_chunks( + ui, + repo, + diffopts, + ctx1, + ctx2, + match, + changes=changes, + stat=stat, + prefix=prefix, + root=root, + hunksfilterfn=hunksfilterfn, + ) + + if stat: + diffopts = diffopts.copy(context=0, noprefix=False) + width = 80 + if not ui.plain(): + width = ui.termwidth() - graphwidth + if fp is not None or ui.canwritewithoutlabels(): out = fp or ui if stat: @@ -241,7 +239,7 @@ ui, ctx.repo(), diffopts, - diff_parent(ctx), + diffutil.diff_parent(ctx), ctx, match=self._makefilematcher(ctx), stat=stat, @@ -249,6 +247,33 @@ hunksfilterfn=self._makehunksfilter(ctx), ) + def getdiffstats(self, ui, ctx, diffopts, stat=False): + chunks = get_diff_chunks( + ui, + ctx.repo(), + diffopts, + diffutil.diff_parent(ctx), + ctx, + match=self._makefilematcher(ctx), + stat=stat, + hunksfilterfn=self._makehunksfilter(ctx), + ) + + diffdata = [] + for filename, additions, removals, binary in patch.diffstatdata( + util.iterlines(chunks) + ): + diffdata.append( + { + b"name": filename, + b"additions": additions, + b"removals": removals, + b"binary": binary, + } + ) + + return diffdata + def changesetlabels(ctx): labels = [b'log.changeset', b'changeset.%s' % ctx.phasestr()] @@ -525,9 +550,10 @@ ) if self._includestat or b'diffstat' in datahint: - self.ui.pushbuffer() - self._differ.showdiff(self.ui, ctx, self._diffopts, stat=True) - fm.data(diffstat=self.ui.popbuffer()) + data = self._differ.getdiffstats( + self.ui, ctx, self._diffopts, stat=True + ) + fm.data(diffstat=fm.formatlist(data, name=b'diffstat')) if self._includediff or b'diff' in datahint: self.ui.pushbuffer() self._differ.showdiff(self.ui, ctx, self._diffopts, stat=False) @@ -749,8 +775,11 @@ limit = attr.ib(default=None) -def parseopts(ui, pats, opts): - # type: (Any, Sequence[bytes], Dict[bytes, Any]) -> walkopts +def parseopts( + ui: Any, + pats: Sequence[bytes], + opts: Dict[bytes, Any], +) -> walkopts: """Parse log command options into walkopts The returned walkopts will be passed in to getrevs() or makewalker(). @@ -1040,8 +1069,12 @@ return revs -def makewalker(repo, wopts): - # type: (Any, walkopts) -> Tuple[smartset.abstractsmartset, Optional[Callable[[Any], matchmod.basematcher]]] +def makewalker( + repo: Any, + wopts: walkopts, +) -> Tuple[ + smartset.abstractsmartset, Optional[Callable[[Any], matchmod.basematcher]] +]: """Build (revs, makefilematcher) to scan revision/file history - revs is the smartset to be traversed. @@ -1091,8 +1124,10 @@ return revs, filematcher -def getrevs(repo, wopts): - # type: (Any, walkopts) -> Tuple[smartset.abstractsmartset, Optional[changesetdiffer]] +def getrevs( + repo: Any, + wopts: walkopts, +) -> Tuple[smartset.abstractsmartset, Optional[changesetdiffer]]: """Return (revs, differ) where revs is a smartset differ is a changesetdiffer with pre-configured file matcher.
--- a/mercurial/mail.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/mail.py Mon Feb 12 16:22:47 2024 +0100 @@ -18,6 +18,14 @@ import socket import time +from typing import ( + Any, + List, + Optional, + Tuple, + Union, +) + from .i18n import _ from .pycompat import ( open, @@ -35,12 +43,6 @@ urlutil, ) -if pycompat.TYPE_CHECKING: - from typing import Any, List, Tuple, Union - - # keep pyflakes happy - assert all((Any, List, Tuple, Union)) - class STARTTLS(smtplib.SMTP): """Derived class to verify the peer certificate for STARTTLS. @@ -103,8 +105,7 @@ return new_socket -def _pyhastls(): - # type: () -> bool +def _pyhastls() -> bool: """Returns true iff Python has TLS support, false otherwise.""" try: import ssl @@ -267,8 +268,7 @@ ) -def codec2iana(cs): - # type: (str) -> str +def codec2iana(cs: str) -> str: ''' ''' cs = email.charset.Charset(cs).input_charset.lower() @@ -278,8 +278,11 @@ return cs -def mimetextpatch(s, subtype='plain', display=False): - # type: (bytes, str, bool) -> email.message.Message +def mimetextpatch( + s: bytes, + subtype: str = 'plain', + display: bool = False, +) -> email.message.Message: """Return MIME message suitable for a patch. Charset will be detected by first trying to decode as us-ascii, then utf-8, and finally the global encodings. If all those fail, fall back to @@ -304,8 +307,9 @@ return mimetextqp(s, subtype, "iso-8859-1") -def mimetextqp(body, subtype, charset): - # type: (bytes, str, str) -> email.message.Message +def mimetextqp( + body: bytes, subtype: str, charset: str +) -> email.message.Message: """Return MIME message. Quoted-printable transfer encoding will be used if necessary. """ @@ -330,8 +334,7 @@ return msg -def _charsets(ui): - # type: (Any) -> List[str] +def _charsets(ui: Any) -> List[str]: '''Obtains charsets to send mail parts not containing patches.''' charsets = [ pycompat.sysstr(cs.lower()) @@ -348,8 +351,7 @@ return [cs for cs in charsets if not cs.endswith('ascii')] -def _encode(ui, s, charsets): - # type: (Any, bytes, List[str]) -> Tuple[bytes, str] +def _encode(ui: Any, s: bytes, charsets: List[str]) -> Tuple[bytes, str]: """Returns (converted) string, charset tuple. Finds out best charset by cycling through sendcharsets in descending order. Tries both encoding and fallbackencoding for input. Only as @@ -399,8 +401,12 @@ return s, 'us-ascii' -def headencode(ui, s, charsets=None, display=False): - # type: (Any, Union[bytes, str], List[str], bool) -> str +def headencode( + ui: Any, + s: Union[bytes, str], + charsets: Optional[List[str]] = None, + display: bool = False, +) -> str: '''Returns RFC-2047 compliant header from given string.''' if not display: # split into words? @@ -409,8 +415,9 @@ return encoding.strfromlocal(s) -def _addressencode(ui, name, addr, charsets=None): - # type: (Any, str, str, List[str]) -> str +def _addressencode( + ui: Any, name: str, addr: str, charsets: Optional[List[str]] = None +) -> str: addr = encoding.strtolocal(addr) name = headencode(ui, name, charsets) try: @@ -429,8 +436,12 @@ return email.utils.formataddr((name, encoding.strfromlocal(addr))) -def addressencode(ui, address, charsets=None, display=False): - # type: (Any, bytes, List[str], bool) -> str +def addressencode( + ui: Any, + address: bytes, + charsets: Optional[List[str]] = None, + display: bool = False, +) -> str: '''Turns address into RFC-2047 compliant header.''' if display or not address: return encoding.strfromlocal(address or b'') @@ -438,8 +449,12 @@ return _addressencode(ui, name, addr, charsets) -def addrlistencode(ui, addrs, charsets=None, display=False): - # type: (Any, List[bytes], List[str], bool) -> List[str] +def addrlistencode( + ui: Any, + addrs: List[bytes], + charsets: Optional[List[str]] = None, + display: bool = False, +) -> List[str]: """Turns a list of addresses into a list of RFC-2047 compliant headers. A single element of input list may contain multiple addresses, but output always has one address per item""" @@ -458,8 +473,12 @@ return result -def mimeencode(ui, s, charsets=None, display=False): - # type: (Any, bytes, List[str], bool) -> email.message.Message +def mimeencode( + ui: Any, + s: bytes, + charsets: Optional[List[str]] = None, + display: bool = False, +) -> email.message.Message: """creates mime text object, encodes it if needed, and sets charset and transfer-encoding accordingly.""" cs = 'us-ascii' @@ -471,8 +490,7 @@ Generator = email.generator.BytesGenerator -def parse(fp): - # type: (Any) -> email.message.Message +def parse(fp: Any) -> email.message.Message: ep = email.parser.Parser() # disable the "universal newlines" mode, which isn't binary safe. # I have no idea if ascii/surrogateescape is correct, but that's @@ -486,14 +504,12 @@ fp.detach() -def parsebytes(data): - # type: (bytes) -> email.message.Message +def parsebytes(data: bytes) -> email.message.Message: ep = email.parser.BytesParser() return ep.parsebytes(data) -def headdecode(s): - # type: (Union[email.header.Header, bytes]) -> bytes +def headdecode(s: Union[email.header.Header, bytes]) -> bytes: '''Decodes RFC-2047 header''' uparts = [] for part, charset in email.header.decode_header(s):
--- a/mercurial/pathutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/pathutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -24,8 +24,7 @@ parsers = policy.importmod('parsers') -def _lowerclean(s): - # type: (bytes) -> bytes +def _lowerclean(s: bytes) -> bytes: return encoding.hfsignoreclean(s.lower()) @@ -64,8 +63,7 @@ else: self.normcase = lambda x: x - def __call__(self, path, mode=None): - # type: (bytes, Optional[Any]) -> None + def __call__(self, path: bytes, mode: Optional[Any] = None) -> None: """Check the relative path. path may contain a pattern (e.g. foodir/**.txt)""" @@ -162,8 +160,7 @@ raise error.Abort(msg % (path, pycompat.bytestr(prefix))) return True - def check(self, path): - # type: (bytes) -> bool + def check(self, path: bytes) -> bool: try: self(path) return True @@ -184,8 +181,12 @@ self._cached = False -def canonpath(root, cwd, myname, auditor=None): - # type: (bytes, bytes, bytes, Optional[pathauditor]) -> bytes +def canonpath( + root: bytes, + cwd: bytes, + myname: bytes, + auditor: Optional[pathauditor] = None, +) -> bytes: """return the canonical path of myname, given cwd and root >>> def check(root, cwd, myname): @@ -287,8 +288,7 @@ ) -def normasprefix(path): - # type: (bytes) -> bytes +def normasprefix(path: bytes) -> bytes: """normalize the specified path as path prefix Returned value can be used safely for "p.startswith(prefix)", @@ -311,8 +311,7 @@ return path -def finddirs(path): - # type: (bytes) -> Iterator[bytes] +def finddirs(path: bytes) -> Iterator[bytes]: pos = path.rfind(b'/') while pos != -1: yield path[:pos] @@ -347,8 +346,7 @@ for f in map: addpath(f) - def addpath(self, path): - # type: (bytes) -> None + def addpath(self, path: bytes) -> None: dirs = self._dirs for base in finddirs(path): if base.endswith(b'/'): @@ -360,8 +358,7 @@ return dirs[base] = 1 - def delpath(self, path): - # type: (bytes) -> None + def delpath(self, path: bytes) -> None: dirs = self._dirs for base in finddirs(path): if dirs[base] > 1: @@ -372,8 +369,7 @@ def __iter__(self): return iter(self._dirs) - def __contains__(self, d): - # type: (bytes) -> bool + def __contains__(self, d: bytes) -> bool: return d in self._dirs @@ -388,4 +384,4 @@ # rather not let our internals know that we're thinking in posix terms # - instead we'll let them be oblivious. join = posixpath.join -dirname = posixpath.dirname # type: Callable[[bytes], bytes] +dirname: Callable[[bytes], bytes] = posixpath.dirname
--- a/mercurial/phases.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/phases.py Mon Feb 12 16:22:47 2024 +0100 @@ -102,6 +102,18 @@ import struct +import typing + +from typing import ( + Any, + Callable, + Dict, + Iterable, + List, + Optional, + Set, + Tuple, +) from .i18n import _ from .node import ( @@ -120,23 +132,17 @@ util, ) -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Set, - Tuple, - ) +Phaseroots = Dict[int, Set[bytes]] + +if typing.TYPE_CHECKING: from . import ( localrepo, ui as uimod, ) - Phaseroots = Dict[int, Set[bytes]] + # keeps pyflakes happy + assert [uimod] + Phasedefaults = List[ Callable[[localrepo.localrepository, Phaseroots], Phaseroots] ] @@ -145,7 +151,9 @@ _fphasesentry = struct.Struct(b'>i20s') # record phase index -public, draft, secret = range(3) # type: int +public: int = 0 +draft: int = 1 +secret: int = 2 archived = 32 # non-continuous for compatibility internal = 96 # non-continuous for compatibility allphases = (public, draft, secret, archived, internal) @@ -174,20 +182,20 @@ no_bundle_phases = all_internal_phases -def supportinternal(repo): - # type: (localrepo.localrepository) -> bool +def supportinternal(repo: "localrepo.localrepository") -> bool: """True if the internal phase can be used on a repository""" return requirements.INTERNAL_PHASE_REQUIREMENT in repo.requirements -def supportarchived(repo): - # type: (localrepo.localrepository) -> bool +def supportarchived(repo: "localrepo.localrepository") -> bool: """True if the archived phase can be used on a repository""" return requirements.ARCHIVED_PHASE_REQUIREMENT in repo.requirements -def _readroots(repo, phasedefaults=None): - # type: (localrepo.localrepository, Optional[Phasedefaults]) -> Tuple[Phaseroots, bool] +def _readroots( + repo: "localrepo.localrepository", + phasedefaults: Optional["Phasedefaults"] = None, +) -> Tuple[Phaseroots, bool]: """Read phase roots from disk phasedefaults is a list of fn(repo, roots) callable, which are @@ -217,8 +225,7 @@ return roots, dirty -def binaryencode(phasemapping): - # type: (Dict[int, List[bytes]]) -> bytes +def binaryencode(phasemapping: Dict[int, List[bytes]]) -> bytes: """encode a 'phase -> nodes' mapping into a binary stream The revision lists are encoded as (phase, root) pairs. @@ -230,8 +237,7 @@ return b''.join(binarydata) -def binarydecode(stream): - # type: (...) -> Dict[int, List[bytes]] +def binarydecode(stream) -> Dict[int, List[bytes]]: """decode a binary stream into a 'phase -> nodes' mapping The (phase, root) pairs are turned back into a dictionary with @@ -349,8 +355,12 @@ class phasecache: - def __init__(self, repo, phasedefaults, _load=True): - # type: (localrepo.localrepository, Optional[Phasedefaults], bool) -> None + def __init__( + self, + repo: "localrepo.localrepository", + phasedefaults: Optional["Phasedefaults"], + _load: bool = True, + ): if _load: # Cheap trick to allow shallow-copy without copy module self.phaseroots, self.dirty = _readroots(repo, phasedefaults) @@ -359,8 +369,7 @@ self.filterunknown(repo) self.opener = repo.svfs - def hasnonpublicphases(self, repo): - # type: (localrepo.localrepository) -> bool + def hasnonpublicphases(self, repo: "localrepo.localrepository") -> bool: """detect if there are revisions with non-public phase""" repo = repo.unfiltered() cl = repo.changelog @@ -371,8 +380,9 @@ revs for phase, revs in self.phaseroots.items() if phase != public ) - def nonpublicphaseroots(self, repo): - # type: (localrepo.localrepository) -> Set[bytes] + def nonpublicphaseroots( + self, repo: "localrepo.localrepository" + ) -> Set[bytes]: """returns the roots of all non-public phases The roots are not minimized, so if the secret revisions are @@ -391,8 +401,12 @@ ] ) - def getrevset(self, repo, phases, subset=None): - # type: (localrepo.localrepository, Iterable[int], Optional[Any]) -> Any + def getrevset( + self, + repo: "localrepo.localrepository", + phases: Iterable[int], + subset: Optional[Any] = None, + ) -> Any: # TODO: finish typing this """return a smartset for the given phases""" self.loadphaserevs(repo) # ensure phase's sets are loaded @@ -488,8 +502,7 @@ self._phasesets[phase] = ps self._loadedrevslen = len(cl) - def loadphaserevs(self, repo): - # type: (localrepo.localrepository) -> None + def loadphaserevs(self, repo: "localrepo.localrepository") -> None: """ensure phase information is loaded in the object""" if self._phasesets is None: try: @@ -502,8 +515,7 @@ self._loadedrevslen = 0 self._phasesets = None - def phase(self, repo, rev): - # type: (localrepo.localrepository, int) -> int + def phase(self, repo: "localrepo.localrepository", rev: int) -> int: # We need a repo argument here to be able to build _phasesets # if necessary. The repository instance is not stored in # phasecache to avoid reference cycles. The changelog instance @@ -690,8 +702,7 @@ return True return False - def filterunknown(self, repo): - # type: (localrepo.localrepository) -> None + def filterunknown(self, repo: "localrepo.localrepository") -> None: """remove unknown nodes from the phase boundary Nothing is lost as unknown nodes only hold data for their descendants. @@ -768,8 +779,7 @@ repo._phasecache.replace(phcache) -def listphases(repo): - # type: (localrepo.localrepository) -> Dict[bytes, bytes] +def listphases(repo: "localrepo.localrepository") -> Dict[bytes, bytes]: """List phases root for serialization over pushkey""" # Use ordered dictionary so behavior is deterministic. keys = util.sortdict() @@ -800,8 +810,12 @@ return keys -def pushphase(repo, nhex, oldphasestr, newphasestr): - # type: (localrepo.localrepository, bytes, bytes, bytes) -> bool +def pushphase( + repo: "localrepo.localrepository", + nhex: bytes, + oldphasestr: bytes, + newphasestr: bytes, +) -> bool: """List phases root for serialization over pushkey""" repo = repo.unfiltered() with repo.lock(): @@ -948,8 +962,7 @@ return pycompat.maplist(cl.node, sorted(new_heads)) -def newcommitphase(ui): - # type: (uimod.ui) -> int +def newcommitphase(ui: "uimod.ui") -> int: """helper to get the target phase of new commit Handle all possible values for the phases.new-commit options. @@ -964,14 +977,16 @@ ) -def hassecret(repo): - # type: (localrepo.localrepository) -> bool +def hassecret(repo: "localrepo.localrepository") -> bool: """utility function that check if a repo have any secret changeset.""" return bool(repo._phasecache.phaseroots[secret]) -def preparehookargs(node, old, new): - # type: (bytes, Optional[int], Optional[int]) -> Dict[bytes, bytes] +def preparehookargs( + node: bytes, + old: Optional[int], + new: Optional[int], +) -> Dict[bytes, bytes]: if old is None: old = b'' else:
--- a/mercurial/posix.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/posix.py Mon Feb 12 16:22:47 2024 +0100 @@ -70,13 +70,6 @@ removedirs = os.removedirs if typing.TYPE_CHECKING: - # Replace the various overloads that come along with aliasing stdlib methods - # with the narrow definition that we care about in the type checking phase - # only. This ensures that both Windows and POSIX see only the definition - # that is actually available. - # - # Note that if we check pycompat.TYPE_CHECKING here, it is always False, and - # the methods aren't replaced. def normpath(path: bytes) -> bytes: raise NotImplementedError
--- a/mercurial/pvec.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/pvec.py Mon Feb 12 16:22:47 2024 +0100 @@ -72,8 +72,7 @@ return v -def _str(v, l): - # type: (int, int) -> bytes +def _str(v: int, l: int) -> bytes: bs = b"" for p in range(l): bs = pycompat.bytechr(v & 255) + bs
--- a/mercurial/pycompat.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/pycompat.py Mon Feb 12 16:22:47 2024 +0100 @@ -12,7 +12,6 @@ import builtins import codecs import concurrent.futures as futures -import functools import getopt import http.client as httplib import http.cookiejar as cookielib @@ -358,26 +357,11 @@ return sysbytes(doc) -def _wrapattrfunc(f): - @functools.wraps(f) - def w(object, name, *args): - if isinstance(name, bytes): - from . import util - - msg = b'function "%s" take `str` as argument, not `bytes`' - fname = f.__name__.encode('ascii') - msg %= fname - util.nouideprecwarn(msg, b"6.6", stacklevel=2) - return f(object, sysstr(name), *args) - - return w - - # these wrappers are automagically imported by hgloader -delattr = _wrapattrfunc(builtins.delattr) -getattr = _wrapattrfunc(builtins.getattr) -hasattr = _wrapattrfunc(builtins.hasattr) -setattr = _wrapattrfunc(builtins.setattr) +delattr = builtins.delattr +getattr = builtins.getattr +hasattr = builtins.hasattr +setattr = builtins.setattr xrange = builtins.range unicode = str @@ -392,7 +376,7 @@ return builtins.open(name, sysstr(mode), buffering, encoding) -safehasattr = _wrapattrfunc(builtins.hasattr) +safehasattr = builtins.hasattr def _getoptbwrapper(
--- a/mercurial/registrar.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/registrar.py Mon Feb 12 16:22:47 2024 +0100 @@ -524,7 +524,7 @@ precheck=None, binary=False, symlink=False, - ): + ): # pytype: disable=signature-mismatch func.mergetype = mergetype func.onfailure = onfailure func.precheck = precheck
--- a/mercurial/repoview.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/repoview.py Mon Feb 12 16:22:47 2024 +0100 @@ -305,6 +305,10 @@ raise error.FilteredIndexError(rev) return revs + def _head_node_ids(self): + # no Rust fast path implemented yet, so just loop in Python + return [self.node(r) for r in self.headrevs()] + def headrevs(self, revs=None): if revs is None: try:
--- a/mercurial/revlog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/revlog.py Mon Feb 12 16:22:47 2024 +0100 @@ -16,6 +16,7 @@ import binascii import collections import contextlib +import functools import io import os import struct @@ -224,9 +225,9 @@ parse_index_v1_nodemap = None -def parse_index_v1_mixed(data, inline): - index, cache = parse_index_v1(data, inline) - return rustrevlog.MixedIndex(index), cache +def parse_index_v1_rust(data, inline, default_header): + cache = (0, data) if inline else None + return rustrevlog.Index(data, default_header), cache # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte @@ -367,7 +368,7 @@ self.opener = opener self.index = index - self.__index_file = index_file + self.index_file = index_file self.data_file = data_file self.sidedata_file = sidedata_file self.inline = inline @@ -416,16 +417,6 @@ self._delay_buffer = None - @property - def index_file(self): - return self.__index_file - - @index_file.setter - def index_file(self, new_index_file): - self.__index_file = new_index_file - if self.inline: - self._segmentfile.filename = new_index_file - def __len__(self): return len(self.index) @@ -652,6 +643,9 @@ """Context manager that keeps data and sidedata files open for reading""" if len(self.index) == 0: yield # nothing to be read + elif self._delay_buffer is not None and self.inline: + msg = "revlog with delayed write should not be inline" + raise error.ProgrammingError(msg) else: with self._segmentfile.reading(): with self._segmentfile_sidedata.reading(): @@ -778,7 +772,6 @@ self.index_file, mode=b"w", checkambig=self.data_config.check_ambig, - atomictemp=True, ) def split_inline(self, tr, header, new_index_file_path=None): @@ -1137,18 +1130,16 @@ ifh.write(entry) else: self._delay_buffer.append(entry) + elif self._delay_buffer is not None: + msg = b'invalid delayed write on inline revlog' + raise error.ProgrammingError(msg) else: offset += curr * self.index.entry_size transaction.add(self.canonical_index_file, offset) assert not sidedata - if self._delay_buffer is None: - ifh.write(entry) - ifh.write(data[0]) - ifh.write(data[1]) - else: - self._delay_buffer.append(entry) - self._delay_buffer.append(data[0]) - self._delay_buffer.append(data[1]) + ifh.write(entry) + ifh.write(data[0]) + ifh.write(data[1]) return ( ifh.tell(), dfh.tell() if dfh else None, @@ -1160,6 +1151,9 @@ def delay(self): assert not self.is_open + if self.inline: + msg = "revlog with delayed write should not be inline" + raise error.ProgrammingError(msg) if self._delay_buffer is not None or self._orig_index_file is not None: # delay or divert already in place return None @@ -1173,12 +1167,13 @@ return self.index_file else: self._delay_buffer = [] - if self.inline: - self._segmentfile._delay_buffer = self._delay_buffer return None def write_pending(self): assert not self.is_open + if self.inline: + msg = "revlog with delayed write should not be inline" + raise error.ProgrammingError(msg) if self._orig_index_file is not None: return None, True any_pending = False @@ -1195,16 +1190,15 @@ ifh.write(b"".join(self._delay_buffer)) any_pending = True self._delay_buffer = None - if self.inline: - self._segmentfile._delay_buffer = self._delay_buffer - else: - assert self._segmentfile._delay_buffer is None self._orig_index_file = self.index_file self.index_file = pending_index_file return self.index_file, any_pending def finalize_pending(self): assert not self.is_open + if self.inline: + msg = "revlog with delayed write should not be inline" + raise error.ProgrammingError(msg) delay = self._delay_buffer is not None divert = self._orig_index_file is not None @@ -1216,7 +1210,7 @@ with self.opener(self.index_file, b'r+') as ifh: ifh.seek(0, os.SEEK_END) ifh.write(b"".join(self._delay_buffer)) - self._segmentfile._delay_buffer = self._delay_buffer = None + self._delay_buffer = None elif divert: if self.opener.exists(self.index_file): self.opener.rename( @@ -1391,194 +1385,6 @@ self._load_inner(chunk_cache) self._concurrencychecker = concurrencychecker - @property - def _generaldelta(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2 - ) - return self.delta_config.general_delta - - @property - def _checkambig(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.data_config.checkambig", b"6.6", stacklevel=2 - ) - return self.data_config.check_ambig - - @property - def _mmaplargeindex(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2 - ) - return self.data_config.mmap_large_index - - @property - def _censorable(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.feature_config.censorable", b"6.6", stacklevel=2 - ) - return self.feature_config.censorable - - @property - def _chunkcachesize(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2 - ) - return self.data_config.chunk_cache_size - - @property - def _maxchainlen(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2 - ) - return self.delta_config.max_chain_len - - @property - def _deltabothparents(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2 - ) - return self.delta_config.delta_both_parents - - @property - def _candidate_group_chunk_size(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.candidate_group_chunk_size", - b"6.6", - stacklevel=2, - ) - return self.delta_config.candidate_group_chunk_size - - @property - def _debug_delta(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2 - ) - return self.delta_config.debug_delta - - @property - def _compengine(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.feature_config.compression_engine", - b"6.6", - stacklevel=2, - ) - return self.feature_config.compression_engine - - @property - def upperboundcomp(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.upper_bound_comp", - b"6.6", - stacklevel=2, - ) - return self.delta_config.upper_bound_comp - - @property - def _compengineopts(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.feature_config.compression_engine_options", - b"6.6", - stacklevel=2, - ) - return self.feature_config.compression_engine_options - - @property - def _maxdeltachainspan(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2 - ) - return self.delta_config.max_deltachain_span - - @property - def _withsparseread(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2 - ) - return self.data_config.with_sparse_read - - @property - def _sparserevlog(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2 - ) - return self.delta_config.sparse_revlog - - @property - def hassidedata(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2 - ) - return self.feature_config.has_side_data - - @property - def _srdensitythreshold(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.data_config.sr_density_threshold", - b"6.6", - stacklevel=2, - ) - return self.data_config.sr_density_threshold - - @property - def _srmingapsize(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2 - ) - return self.data_config.sr_min_gap_size - - @property - def _compute_rank(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2 - ) - return self.feature_config.compute_rank - - @property - def canonical_parent_order(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.feature_config.canonical_parent_order", - b"6.6", - stacklevel=2, - ) - return self.feature_config.canonical_parent_order - - @property - def _lazydelta(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2 - ) - return self.delta_config.lazy_delta - - @property - def _lazydeltabase(self): - """temporary compatibility proxy""" - util.nouideprecwarn( - b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2 - ) - return self.delta_config.lazy_delta_base - def _init_opts(self): """process options (from above/config) to setup associated default revlog mode @@ -1876,7 +1682,10 @@ if self._nodemap_file is not None: use_rust_index = True else: - use_rust_index = self.opener.options.get(b'rust.index') + # Using the CIndex is not longer possible, as the + # `AncestorsIterator` and `LazyAncestors` classes now require + # a Rust index for instantiation. + use_rust_index = True self._parse_index = parse_index_v1 if self._format_version == REVLOGV0: @@ -1888,7 +1697,9 @@ elif devel_nodemap: self._parse_index = parse_index_v1_nodemap elif use_rust_index: - self._parse_index = parse_index_v1_mixed + self._parse_index = functools.partial( + parse_index_v1_rust, default_header=new_header + ) try: d = self._parse_index(index_data, self._inline) index, chunkcache = d @@ -2550,6 +2361,12 @@ ishead[e[5]] = ishead[e[6]] = 0 # my parent are not return [r for r, val in enumerate(ishead) if val] + def _head_node_ids(self): + try: + return self.index.head_node_ids() + except AttributeError: + return [self.node(r) for r in self.headrevs()] + def heads(self, start=None, stop=None): """return the list of all nodes that have no children @@ -2561,8 +2378,7 @@ if start is None and stop is None: if not len(self): return [self.nullid] - return [self.node(r) for r in self.headrevs()] - + return self._head_node_ids() if start is None: start = nullrev else: @@ -3010,7 +2826,7 @@ # manifest), no risk of collision. return self.radix + b'.i.s' - def _enforceinlinesize(self, tr, side_write=True): + def _enforceinlinesize(self, tr): """Check if the revlog is too big for inline and convert if so. This should be called after revisions are added to the revlog. If the @@ -3019,56 +2835,59 @@ """ tiprev = len(self) - 1 total_size = self.start(tiprev) + self.length(tiprev) - if not self._inline or total_size < _maxinline: + if not self._inline or (self._may_inline and total_size < _maxinline): return if self._docket is not None: msg = b"inline revlog should not have a docket" raise error.ProgrammingError(msg) + # In the common case, we enforce inline size because the revlog has + # been appened too. And in such case, it must have an initial offset + # recorded in the transaction. troffset = tr.findoffset(self._inner.canonical_index_file) - if troffset is None: + pre_touched = troffset is not None + if not pre_touched and self.target[0] != KIND_CHANGELOG: raise error.RevlogError( _(b"%s not found in the transaction") % self._indexfile ) - if troffset: - tr.addbackup(self._inner.canonical_index_file, for_offset=True) + + tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched) tr.add(self._datafile, 0) new_index_file_path = None - if side_write: - old_index_file_path = self._indexfile - new_index_file_path = self._split_index_file - opener = self.opener - weak_self = weakref.ref(self) - - # the "split" index replace the real index when the transaction is - # finalized - def finalize_callback(tr): - opener.rename( - new_index_file_path, - old_index_file_path, - checkambig=True, - ) - maybe_self = weak_self() - if maybe_self is not None: - maybe_self._indexfile = old_index_file_path - maybe_self._inner.index_file = maybe_self._indexfile - - def abort_callback(tr): - maybe_self = weak_self() - if maybe_self is not None: - maybe_self._indexfile = old_index_file_path - maybe_self._inner.inline = True - maybe_self._inner.index_file = old_index_file_path - - tr.registertmp(new_index_file_path) - if self.target[1] is not None: - callback_id = b'000-revlog-split-%d-%s' % self.target - else: - callback_id = b'000-revlog-split-%d' % self.target[0] - tr.addfinalize(callback_id, finalize_callback) - tr.addabort(callback_id, abort_callback) + old_index_file_path = self._indexfile + new_index_file_path = self._split_index_file + opener = self.opener + weak_self = weakref.ref(self) + + # the "split" index replace the real index when the transaction is + # finalized + def finalize_callback(tr): + opener.rename( + new_index_file_path, + old_index_file_path, + checkambig=True, + ) + maybe_self = weak_self() + if maybe_self is not None: + maybe_self._indexfile = old_index_file_path + maybe_self._inner.index_file = maybe_self._indexfile + + def abort_callback(tr): + maybe_self = weak_self() + if maybe_self is not None: + maybe_self._indexfile = old_index_file_path + maybe_self._inner.inline = True + maybe_self._inner.index_file = old_index_file_path + + tr.registertmp(new_index_file_path) + if self.target[1] is not None: + callback_id = b'000-revlog-split-%d-%s' % self.target + else: + callback_id = b'000-revlog-split-%d' % self.target[0] + tr.addfinalize(callback_id, finalize_callback) + tr.addabort(callback_id, abort_callback) self._format_flags &= ~FLAG_INLINE_DATA self._inner.split_inline( @@ -4014,16 +3833,16 @@ if addrevisioncb: addrevisioncb(self, rev, node) - def censorrevision(self, tr, censornode, tombstone=b''): + def censorrevision(self, tr, censor_nodes, tombstone=b''): if self._format_version == REVLOGV0: raise error.RevlogError( _(b'cannot censor with version %d revlogs') % self._format_version ) elif self._format_version == REVLOGV1: - rewrite.v1_censor(self, tr, censornode, tombstone) + rewrite.v1_censor(self, tr, censor_nodes, tombstone) else: - rewrite.v2_censor(self, tr, censornode, tombstone) + rewrite.v2_censor(self, tr, censor_nodes, tombstone) def verifyintegrity(self, state): """Verifies the integrity of the revlog.
--- a/mercurial/revlogutils/__init__.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/revlogutils/__init__.py Mon Feb 12 16:22:47 2024 +0100 @@ -65,7 +65,7 @@ class revisioninfo: """Information about a revision that allows building its fulltext node: expected hash of the revision - p1, p2: parent revs of the revision + p1, p2: parent revs of the revision (as node) btext: built text cache consisting of a one-element list cachedelta: (baserev, uncompressed_delta, usage_mode) or None flags: flags associated to the revision storage
--- a/mercurial/revlogutils/deltas.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/revlogutils/deltas.py Mon Feb 12 16:22:47 2024 +0100 @@ -8,6 +8,7 @@ """Helper class to compute deltas stored inside revlogs""" +import abc import collections import struct @@ -584,395 +585,574 @@ ) -def is_good_delta_info(revlog, deltainfo, revinfo): - """Returns True if the given delta is good. Good means that it is within - the disk span, disk size, and chain length bounds that we know to be - performant.""" - if deltainfo is None: - return False - - # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so - # we should never end up asking such question. Adding the assert as a - # safe-guard to detect anything that would be fishy in this regard. - assert ( - revinfo.cachedelta is None - or revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE - or not revlog.delta_config.general_delta - ) - - # - 'deltainfo.distance' is the distance from the base revision -- - # bounding it limits the amount of I/O we need to do. - # - 'deltainfo.compresseddeltalen' is the sum of the total size of - # deltas we need to apply -- bounding it limits the amount of CPU - # we consume. - - textlen = revinfo.textlen - defaultmax = textlen * 4 - maxdist = revlog.delta_config.max_deltachain_span - if not maxdist: - maxdist = deltainfo.distance # ensure the conditional pass - maxdist = max(maxdist, defaultmax) - - # Bad delta from read span: - # - # If the span of data read is larger than the maximum allowed. - # - # In the sparse-revlog case, we rely on the associated "sparse reading" - # to avoid issue related to the span of data. In theory, it would be - # possible to build pathological revlog where delta pattern would lead - # to too many reads. However, they do not happen in practice at all. So - # we skip the span check entirely. - if not revlog.delta_config.sparse_revlog and maxdist < deltainfo.distance: - return False - - # Bad delta from new delta size: - # - # If the delta size is larger than the target text, storing the - # delta will be inefficient. - if textlen < deltainfo.deltalen: - return False - - # Bad delta from cumulated payload size: - # - # If the sum of delta get larger than K * target text length. - if textlen * LIMIT_DELTA2TEXT < deltainfo.compresseddeltalen: - return False - - # Bad delta from chain length: - # - # If the number of delta in the chain gets too high. - if ( - revlog.delta_config.max_chain_len - and revlog.delta_config.max_chain_len < deltainfo.chainlen - ): - return False - - # bad delta from intermediate snapshot size limit - # - # If an intermediate snapshot size is higher than the limit. The - # limit exist to prevent endless chain of intermediate delta to be - # created. - if ( - deltainfo.snapshotdepth is not None - and (textlen >> deltainfo.snapshotdepth) < deltainfo.deltalen - ): - return False - - # bad delta if new intermediate snapshot is larger than the previous - # snapshot - if ( - deltainfo.snapshotdepth - and revlog.length(deltainfo.base) < deltainfo.deltalen - ): - return False - - return True - - # If a revision's full text is that much bigger than a base candidate full # text's, it is very unlikely that it will produce a valid delta. We no longer # consider these candidates. LIMIT_BASE2TEXT = 500 - -def _candidategroups( - revlog, - textlen, - p1, - p2, - cachedelta, - excluded_bases=None, - target_rev=None, - snapshot_cache=None, -): - """Provides group of revision to be tested as delta base - - This top level function focus on emitting groups with unique and worthwhile - content. See _raw_candidate_groups for details about the group order. - """ - # should we try to build a delta? - if not (len(revlog) and revlog._storedeltachains): - yield None - return - - if target_rev is None: - target_rev = len(revlog) +### stage of the search, used for debug and to select and to adjust some logic. +# initial stage, next step is unknown +_STAGE_UNSPECIFIED = "unspecified" +# trying the cached delta +_STAGE_CACHED = "cached" +# trying delta based on parents +_STAGE_PARENTS = "parents" +# trying to build a valid snapshot of any level +_STAGE_SNAPSHOT = "snapshot" +# trying to build a delta based of the previous revision +_STAGE_PREV = "prev" +# trying to build a full snapshot +_STAGE_FULL = "full" - if not revlog.delta_config.general_delta: - # before general delta, there is only one possible delta base - yield (target_rev - 1,) - yield None - return + +class _BaseDeltaSearch(abc.ABC): + """perform the search of a good delta for a single revlog revision - # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner so - # we should never end up asking such question. Adding the assert as a - # safe-guard to detect anything that would be fishy in this regard. - assert ( - cachedelta is None - or cachedelta[2] != DELTA_BASE_REUSE_FORCE - or not revlog.delta_config.general_delta - ) + note: some of the deltacomputer.finddeltainfo logic should probably move + here. + """ - deltalength = revlog.length - deltaparent = revlog.deltaparent - sparse = revlog.delta_config.sparse_revlog - good = None - - deltas_limit = textlen * LIMIT_DELTA2TEXT - group_chunk_size = revlog.delta_config.candidate_group_chunk_size - - tested = {nullrev} - candidates = _refinedgroups( + def __init__( + self, revlog, + revinfo, p1, p2, cachedelta, - snapshot_cache=snapshot_cache, - ) - while True: - temptative = candidates.send(good) - if temptative is None: - break + excluded_bases=None, + target_rev=None, + snapshot_cache=None, + ): + # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner + # so we should never end up asking such question. Adding the assert as + # a safe-guard to detect anything that would be fishy in this regard. + assert ( + cachedelta is None + or cachedelta[2] != DELTA_BASE_REUSE_FORCE + or not revlog.delta_config.general_delta + ) + self.revlog = revlog + self.revinfo = revinfo + self.textlen = revinfo.textlen + self.p1 = p1 + self.p2 = p2 + self.cachedelta = cachedelta + self.excluded_bases = excluded_bases + if target_rev is None: + self.target_rev = len(self.revlog) + self.target_rev = target_rev + if snapshot_cache is None: + # map: base-rev: [snapshot-revs] + snapshot_cache = SnapshotCache() + self.snapshot_cache = snapshot_cache + + self.tested = {nullrev} + + self.current_stage = _STAGE_UNSPECIFIED + self.current_group = None + self._init_group() + + def is_good_delta_info(self, deltainfo): + """Returns True if the given delta is good. + + Good means that it is within the disk span, disk size, and chain length + bounds that we know to be performant. + """ + if not self._is_good_delta_info_universal(deltainfo): + return False + if not self._is_good_delta_info_chain_quality(deltainfo): + return False + return True + + def _is_good_delta_info_universal(self, deltainfo): + """Returns True if the given delta is good. + + This performs generic checks needed by all format variants. + + This is used by is_good_delta_info. + """ + + if deltainfo is None: + return False + + # the DELTA_BASE_REUSE_FORCE case should have been taken care of sooner + # so we should never end up asking such question. Adding the assert as + # a safe-guard to detect anything that would be fishy in this regard. + assert ( + self.revinfo.cachedelta is None + or self.revinfo.cachedelta[2] != DELTA_BASE_REUSE_FORCE + or not self.revlog.delta_config.general_delta + ) + + # Bad delta from new delta size: + # + # If the delta size is larger than the target text, storing the delta + # will be inefficient. + if self.revinfo.textlen < deltainfo.deltalen: + return False + + return True + + def _is_good_delta_info_chain_quality(self, deltainfo): + """Returns True if the chain associated with the delta is good. + + This performs checks for format that use delta chains. + + This is used by is_good_delta_info. + """ + # - 'deltainfo.distance' is the distance from the base revision -- + # bounding it limits the amount of I/O we need to do. + + defaultmax = self.revinfo.textlen * 4 + maxdist = self.revlog.delta_config.max_deltachain_span + if not maxdist: + maxdist = deltainfo.distance # ensure the conditional pass + maxdist = max(maxdist, defaultmax) + + # Bad delta from read span: + # + # If the span of data read is larger than the maximum allowed. + # + # In the sparse-revlog case, we rely on the associated "sparse + # reading" to avoid issue related to the span of data. In theory, it + # would be possible to build pathological revlog where delta pattern + # would lead to too many reads. However, they do not happen in + # practice at all. So we skip the span check entirely. + if ( + not self.revlog.delta_config.sparse_revlog + and maxdist < deltainfo.distance + ): + return False + + # Bad delta from cumulated payload size: + # + # - 'deltainfo.compresseddeltalen' is the sum of the total size of + # deltas we need to apply -- bounding it limits the amount of CPU + # we consume. + max_chain_data = self.revinfo.textlen * LIMIT_DELTA2TEXT + # If the sum of delta get larger than K * target text length. + if max_chain_data < deltainfo.compresseddeltalen: + return False + + # Bad delta from chain length: + # + # If the number of delta in the chain gets too high. + if ( + self.revlog.delta_config.max_chain_len + and self.revlog.delta_config.max_chain_len < deltainfo.chainlen + ): + return False + return True + + @property + def done(self): + """True when all possible candidate have been tested""" + return self.current_group is None + + @abc.abstractmethod + def next_group(self, good_delta=None): + """move to the next group to test + + The group of revision to test will be available in + `self.current_group`. If the previous group had any good delta, the + best one can be passed as the `good_delta` parameter to help selecting + the next group. + + If not revision remains to be, `self.done` will be True and + `self.current_group` will be None. + """ + pass + + @abc.abstractmethod + def _init_group(self): + pass + + +class _NoDeltaSearch(_BaseDeltaSearch): + """Search for no delta. + + This search variant is to be used in case where we should not store delta. + """ + + def _init_group(self): + self.current_stage = _STAGE_FULL + + def next_group(self, good_delta=None): + pass + + +class _PrevDeltaSearch(_BaseDeltaSearch): + """Search for delta against the previous revision only + + This search variant is to be used when the format does not allow for delta + against arbitrary bases. + """ + + def _init_group(self): + self.current_stage = _STAGE_PREV + self.current_group = [self.target_rev - 1] + self.tested.update(self.current_group) + + def next_group(self, good_delta=None): + self.current_stage = _STAGE_FULL + self.current_group = None + + +class _GeneralDeltaSearch(_BaseDeltaSearch): + """Delta search variant for general-delta repository""" + + def _init_group(self): + # Why search for delta base if we cannot use a delta base ? + # also see issue6056 + assert self.revlog.delta_config.general_delta + self._candidates_iterator = self._iter_groups() + self._last_good = None + if ( + self.cachedelta is not None + and self.cachedelta[2] > DELTA_BASE_REUSE_NO + and self._pre_filter_rev(self.cachedelta[0]) + ): + # First we try to reuse a the delta contained in the bundle. (or from + # the source revlog) + # + # This logic only applies to general delta repositories and can be + # disabled through configuration. Disabling reuse source delta is + # useful when we want to make sure we recomputed "optimal" deltas. + self.current_stage = _STAGE_CACHED + self._internal_group = (self.cachedelta[0],) + self._internal_idx = 0 + self.current_group = self._internal_group + self.tested.update(self.current_group) + else: + self._next_internal_group() + + def _next_internal_group(self): + # self._internal_group can be larger than self.current_group + self._internal_idx = 0 + group = self._candidates_iterator.send(self._last_good) + if group is not None: + group = self._pre_filter_candidate_revs(group) + self._internal_group = group + if self._internal_group is None: + self.current_group = None + elif len(self._internal_group) == 0: + self.next_group() + else: + chunk_size = self.revlog.delta_config.candidate_group_chunk_size + if chunk_size > 0: + self.current_group = self._internal_group[:chunk_size] + self._internal_idx += chunk_size + else: + self.current_group = self._internal_group + self._internal_idx += len(self.current_group) + + self.tested.update(self.current_group) + + def next_group(self, good_delta=None): + old_good = self._last_good + if good_delta is not None: + self._last_good = good_delta + if self.current_stage == _STAGE_CACHED and good_delta is not None: + # the cache is good, let us use the cache as requested + self._candidates_iterator = None + self._internal_group = None + self._internal_idx = None + self.current_group = None + return + + if (self._internal_idx < len(self._internal_group)) and ( + old_good != good_delta + ): + # When the size of the candidate group is big, it can result in + # a quite significant performance impact. To reduce this, we + # can send them in smaller batches until the new batch does not + # provide any improvements. + # + # This might reduce the overall efficiency of the compression + # in some corner cases, but that should also prevent very + # pathological cases from being an issue. (eg. 20 000 + # candidates). + # + # XXX note that the ordering of the group becomes important as + # it now impacts the final result. The current order is + # unprocessed and can be improved. + next_idx = self._internal_idx + self._group_chunk_size + self.current_group = self._internal_group[ + self._internal_idx : next_idx + ] + self.tested.update(self.current_group) + self._internal_idx = next_idx + else: + self._next_internal_group() + + def _pre_filter_candidate_revs(self, temptative): + """filter possible candidate before computing a delta + + This function use various criteria to pre-filter candidate delta base + before we compute a delta and evaluate its quality. + + Such pre-filter limit the number of computed delta, an expensive operation. + + return the updated list of revision to test + """ + deltalength = self.revlog.length + deltaparent = self.revlog.deltaparent + + tested = self.tested group = [] for rev in temptative: # skip over empty delta (no need to include them in a chain) while not (rev == nullrev or rev in tested or deltalength(rev)): tested.add(rev) rev = deltaparent(rev) - # no need to try a delta against nullrev, this will be done as a - # last resort. - if rev == nullrev: - continue - # filter out revision we tested already - if rev in tested: - continue + if self._pre_filter_rev(rev): + group.append(rev) + else: + self.tested.add(rev) + return group + + def _pre_filter_rev_universal(self, rev): + """pre filtering that is need in all cases. + + return True if it seems okay to test a rev, False otherwise. + + used by _pre_filter_rev. + """ + # no need to try a delta against nullrev, this will be done as + # a last resort. + if rev == nullrev: + return False + # filter out revision we tested already + if rev in self.tested: + return False - # an higher authority deamed the base unworthy (e.g. censored) - if excluded_bases is not None and rev in excluded_bases: - tested.add(rev) - continue - # We are in some recomputation cases and that rev is too high in - # the revlog - if target_rev is not None and rev >= target_rev: - tested.add(rev) - continue - # filter out delta base that will never produce good delta - if deltas_limit < revlog.length(rev): - tested.add(rev) - continue - if sparse and revlog.rawsize(rev) < (textlen // LIMIT_BASE2TEXT): - tested.add(rev) - continue - # no delta for rawtext-changing revs (see "candelta" for why) - if revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: - tested.add(rev) - continue + # an higher authority deamed the base unworthy (e.g. censored) + if self.excluded_bases is not None and rev in self.excluded_bases: + return False + # We are in some recomputation cases and that rev is too high + # in the revlog + if self.target_rev is not None and rev >= self.target_rev: + return False + # no delta for rawtext-changing revs (see "candelta" for why) + if self.revlog.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS: + return False + return True + + def _pre_filter_rev_delta_chain(self, rev): + """pre filtering that is needed in sparse revlog cases + + return True if it seems okay to test a rev, False otherwise. + + used by _pre_filter_rev. + """ + deltas_limit = self.revinfo.textlen * LIMIT_DELTA2TEXT + # filter out delta base that will never produce good delta + # + # if the delta of that base is already bigger than the limit + # for the delta chain size, doing a delta is hopeless. + if deltas_limit < self.revlog.length(rev): + return False - # If we reach here, we are about to build and test a delta. - # The delta building process will compute the chaininfo in all - # case, since that computation is cached, it is fine to access it - # here too. - chainlen, chainsize = revlog._chaininfo(rev) - # if chain will be too long, skip base - if ( - revlog.delta_config.max_chain_len - and chainlen >= revlog.delta_config.max_chain_len - ): - tested.add(rev) - continue - # if chain already have too much data, skip base - if deltas_limit < chainsize: - tested.add(rev) - continue - if sparse and revlog.delta_config.upper_bound_comp is not None: - maxcomp = revlog.delta_config.upper_bound_comp - basenotsnap = (p1, p2, nullrev) - if rev not in basenotsnap and revlog.issnapshot(rev): - snapshotdepth = revlog.snapshotdepth(rev) - # If text is significantly larger than the base, we can - # expect the resulting delta to be proportional to the size - # difference - revsize = revlog.rawsize(rev) - rawsizedistance = max(textlen - revsize, 0) - # use an estimate of the compression upper bound. - lowestrealisticdeltalen = rawsizedistance // maxcomp + # If we reach here, we are about to build and test a delta. + # The delta building process will compute the chaininfo in all + # case, since that computation is cached, it is fine to access + # it here too. + chainlen, chainsize = self.revlog._chaininfo(rev) + # if chain will be too long, skip base + if ( + self.revlog.delta_config.max_chain_len + and chainlen >= self.revlog.delta_config.max_chain_len + ): + return False + # if chain already have too much data, skip base + if deltas_limit < chainsize: + return False + return True + + def _pre_filter_rev(self, rev): + """return True if it seems okay to test a rev, False otherwise""" + if not self._pre_filter_rev_universal(rev): + return False + if not self._pre_filter_rev_delta_chain(rev): + return False + return True + + def _iter_parents(self): + # exclude already lazy tested base if any + parents = [p for p in (self.p1, self.p2) if p != nullrev] - # check the absolute constraint on the delta size - snapshotlimit = textlen >> snapshotdepth - if snapshotlimit < lowestrealisticdeltalen: - # delta lower bound is larger than accepted upper bound - tested.add(rev) - continue - - # check the relative constraint on the delta size - revlength = revlog.length(rev) - if revlength < lowestrealisticdeltalen: - # delta probable lower bound is larger than target base - tested.add(rev) - continue + self.current_stage = _STAGE_PARENTS + if ( + not self.revlog.delta_config.delta_both_parents + and len(parents) == 2 + ): + parents.sort() + # To minimize the chance of having to build a fulltext, + # pick first whichever parent is closest to us (max rev) + yield (parents[1],) + # then the other one (min rev) if the first did not fit + yield (parents[0],) + elif len(parents) > 0: + # Test all parents (1 or 2), and keep the best candidate + yield parents - group.append(rev) - if group: - # When the size of the candidate group is big, it can result in a - # quite significant performance impact. To reduce this, we can send - # them in smaller batches until the new batch does not provide any - # improvements. - # - # This might reduce the overall efficiency of the compression in - # some corner cases, but that should also prevent very pathological - # cases from being an issue. (eg. 20 000 candidates). - # - # XXX note that the ordering of the group becomes important as it - # now impacts the final result. The current order is unprocessed - # and can be improved. - if group_chunk_size == 0: - tested.update(group) - good = yield tuple(group) - else: - prev_good = good - for start in range(0, len(group), group_chunk_size): - sub_group = group[start : start + group_chunk_size] - tested.update(sub_group) - good = yield tuple(sub_group) - if prev_good == good: - break + def _iter_prev(self): + # other approach failed try against prev to hopefully save us a + # fulltext. + self.current_stage = _STAGE_PREV + yield (self.target_rev - 1,) - yield None + def _iter_groups(self): + good = None + for group in self._iter_parents(): + good = yield group + if good is not None: + break + else: + assert good is None + yield from self._iter_prev() + yield None -def _refinedgroups(revlog, p1, p2, cachedelta, snapshot_cache=None): - good = None - # First we try to reuse a the delta contained in the bundle. - # (or from the source revlog) - # - # This logic only applies to general delta repositories and can be disabled - # through configuration. Disabling reuse source delta is useful when - # we want to make sure we recomputed "optimal" deltas. - debug_info = None - if cachedelta is not None and cachedelta[2] > DELTA_BASE_REUSE_NO: - # Assume what we received from the server is a good choice - # build delta will reuse the cache - if debug_info is not None: - debug_info['cached-delta.tested'] += 1 - good = yield (cachedelta[0],) - if good is not None: - if debug_info is not None: - debug_info['cached-delta.accepted'] += 1 - yield None - return - if snapshot_cache is None: - snapshot_cache = SnapshotCache() - groups = _rawgroups( - revlog, - p1, - p2, - cachedelta, - snapshot_cache, - ) - for candidates in groups: - good = yield candidates - if good is not None: - break +class _SparseDeltaSearch(_GeneralDeltaSearch): + """Delta search variants for sparse-revlog""" + + def is_good_delta_info(self, deltainfo): + """Returns True if the given delta is good. + + Good means that it is within the disk span, disk size, and chain length + bounds that we know to be performant. + """ + if not self._is_good_delta_info_universal(deltainfo): + return False + if not self._is_good_delta_info_chain_quality(deltainfo): + return False + if not self._is_good_delta_info_snapshot_constraints(deltainfo): + return False + return True + + def _is_good_delta_info_snapshot_constraints(self, deltainfo): + """Returns True if the chain associated with snapshots + + This performs checks for format that use sparse-revlog and intermediate + snapshots. - # If sparse revlog is enabled, we can try to refine the available deltas - if not revlog.delta_config.sparse_revlog: - yield None - return + This is used by is_good_delta_info. + """ + # if not a snapshot, this method has no filtering to do + if deltainfo.snapshotdepth is None: + return True + # bad delta from intermediate snapshot size limit + # + # If an intermediate snapshot size is higher than the limit. The + # limit exist to prevent endless chain of intermediate delta to be + # created. + if ( + self.revinfo.textlen >> deltainfo.snapshotdepth + ) < deltainfo.deltalen: + return False + + # bad delta if new intermediate snapshot is larger than the previous + # snapshot + if self.revlog.length(deltainfo.base) < deltainfo.deltalen: + return False + + return True + + def _pre_filter_rev(self, rev): + """return True if it seems okay to test a rev, False otherwise""" + if not self._pre_filter_rev_universal(rev): + return False + if not self._pre_filter_rev_delta_chain(rev): + return False + if not self._pre_filter_rev_sparse(rev): + return False + return True - # if we have a refinable value, try to refine it - if good is not None and good not in (p1, p2) and revlog.issnapshot(good): - # refine snapshot down - previous = None - while previous != good: - previous = good - base = revlog.deltaparent(good) - if base == nullrev: - break - good = yield (base,) - # refine snapshot up - if not snapshot_cache.snapshots: - snapshot_cache.update(revlog, good + 1) - previous = None - while good != previous: - previous = good - children = tuple(sorted(c for c in snapshot_cache.snapshots[good])) - good = yield children + def _pre_filter_rev_sparse(self, rev): + """pre filtering that is needed in sparse revlog cases + + return True if it seems okay to test a rev, False otherwise. + + used by _pre_filter_rev. + """ + assert self.revlog.delta_config.sparse_revlog + # if the revision we test again is too small, the resulting delta + # will be large anyway as that amount of data to be added is big + if self.revlog.rawsize(rev) < (self.textlen // LIMIT_BASE2TEXT): + return False - if debug_info is not None: - if good is None: - debug_info['no-solution'] += 1 - - yield None - - -def _rawgroups(revlog, p1, p2, cachedelta, snapshot_cache=None): - """Provides group of revision to be tested as delta base - - This lower level function focus on emitting delta theorically interresting - without looking it any practical details. + if self.revlog.delta_config.upper_bound_comp is not None: + maxcomp = self.revlog.delta_config.upper_bound_comp + basenotsnap = (self.p1, self.p2, nullrev) + if rev not in basenotsnap and self.revlog.issnapshot(rev): + snapshotdepth = self.revlog.snapshotdepth(rev) + # If text is significantly larger than the base, we can + # expect the resulting delta to be proportional to the size + # difference + revsize = self.revlog.rawsize(rev) + rawsizedistance = max(self.textlen - revsize, 0) + # use an estimate of the compression upper bound. + lowestrealisticdeltalen = rawsizedistance // maxcomp - The group order aims at providing fast or small candidates first. - """ - # Why search for delta base if we cannot use a delta base ? - assert revlog.delta_config.general_delta - # also see issue6056 - sparse = revlog.delta_config.sparse_revlog - curr = len(revlog) - prev = curr - 1 - deltachain = lambda rev: revlog._deltachain(rev)[0] + # check the absolute constraint on the delta size + snapshotlimit = self.textlen >> snapshotdepth + if snapshotlimit < lowestrealisticdeltalen: + # delta lower bound is larger than accepted upper + # bound + return False - # exclude already lazy tested base if any - parents = [p for p in (p1, p2) if p != nullrev] + # check the relative constraint on the delta size + revlength = self.revlog.length(rev) + if revlength < lowestrealisticdeltalen: + # delta probable lower bound is larger than target + # base + return False + return True - if not revlog.delta_config.delta_both_parents and len(parents) == 2: - parents.sort() - # To minimize the chance of having to build a fulltext, - # pick first whichever parent is closest to us (max rev) - yield (parents[1],) - # then the other one (min rev) if the first did not fit - yield (parents[0],) - elif len(parents) > 0: - # Test all parents (1 or 2), and keep the best candidate - yield parents + def _iter_snapshots_base(self): + assert self.revlog.delta_config.sparse_revlog + assert self.current_stage == _STAGE_SNAPSHOT + prev = self.target_rev - 1 + deltachain = lambda rev: self.revlog._deltachain(rev)[0] - if sparse and parents: - if snapshot_cache is None: - # map: base-rev: [snapshot-revs] - snapshot_cache = SnapshotCache() - # See if we can use an existing snapshot in the parent chains to use as - # a base for a new intermediate-snapshot + parents = [p for p in (self.p1, self.p2) if p != nullrev] + if not parents: + return + # See if we can use an existing snapshot in the parent chains to + # use as a base for a new intermediate-snapshot # - # search for snapshot in parents delta chain - # map: snapshot-level: snapshot-rev + # search for snapshot in parents delta chain map: snapshot-level: + # snapshot-rev parents_snaps = collections.defaultdict(set) candidate_chains = [deltachain(p) for p in parents] for chain in candidate_chains: for idx, s in enumerate(chain): - if not revlog.issnapshot(s): + if not self.revlog.issnapshot(s): break parents_snaps[idx].add(s) snapfloor = min(parents_snaps[0]) + 1 - snapshot_cache.update(revlog, snapfloor) + self.snapshot_cache.update(self.revlog, snapfloor) # search for the highest "unrelated" revision # # Adding snapshots used by "unrelated" revision increase the odd we # reuse an independant, yet better snapshot chain. # - # XXX instead of building a set of revisions, we could lazily enumerate - # over the chains. That would be more efficient, however we stick to - # simple code for now. + # XXX instead of building a set of revisions, we could lazily + # enumerate over the chains. That would be more efficient, however + # we stick to simple code for now. all_revs = set() for chain in candidate_chains: all_revs.update(chain) other = None - for r in revlog.revs(prev, snapfloor): + for r in self.revlog.revs(prev, snapfloor): if r not in all_revs: other = r break if other is not None: - # To avoid unfair competition, we won't use unrelated intermediate - # snapshot that are deeper than the ones from the parent delta - # chain. + # To avoid unfair competition, we won't use unrelated + # intermediate snapshot that are deeper than the ones from the + # parent delta chain. max_depth = max(parents_snaps.keys()) chain = deltachain(other) for depth, s in enumerate(chain): @@ -980,20 +1160,20 @@ continue if max_depth < depth: break - if not revlog.issnapshot(s): + if not self.revlog.issnapshot(s): break parents_snaps[depth].add(s) - # Test them as possible intermediate snapshot base - # We test them from highest to lowest level. High level one are more - # likely to result in small delta + # Test them as possible intermediate snapshot base We test them + # from highest to lowest level. High level one are more likely to + # result in small delta floor = None for idx, snaps in sorted(parents_snaps.items(), reverse=True): siblings = set() for s in snaps: - siblings.update(snapshot_cache.snapshots[s]) - # Before considering making a new intermediate snapshot, we check - # if an existing snapshot, children of base we consider, would be - # suitable. + siblings.update(self.snapshot_cache.snapshots[s]) + # Before considering making a new intermediate snapshot, we + # check if an existing snapshot, children of base we consider, + # would be suitable. # # It give a change to reuse a delta chain "unrelated" to the # current revision instead of starting our own. Without such @@ -1002,29 +1182,76 @@ if floor is not None: # We only do this for siblings created after the one in our - # parent's delta chain. Those created before has less chances - # to be valid base since our ancestors had to create a new - # snapshot. + # parent's delta chain. Those created before has less + # chances to be valid base since our ancestors had to + # create a new snapshot. siblings = [r for r in siblings if floor < r] yield tuple(sorted(siblings)) # then test the base from our parent's delta chain. yield tuple(sorted(snaps)) floor = min(snaps) # No suitable base found in the parent chain, search if any full - # snapshots emitted since parent's base would be a suitable base for an - # intermediate snapshot. + # snapshots emitted since parent's base would be a suitable base + # for an intermediate snapshot. # # It give a chance to reuse a delta chain unrelated to the current # revisions instead of starting our own. Without such re-use, - # topological branches would keep reopening new full chains. Creating - # more and more snapshot as the repository grow. - full = [r for r in snapshot_cache.snapshots[nullrev] if snapfloor <= r] + # topological branches would keep reopening new full chains. + # Creating more and more snapshot as the repository grow. + full = [ + r for r in self.snapshot_cache.snapshots[nullrev] if snapfloor <= r + ] yield tuple(sorted(full)) - if not sparse: - # other approach failed try against prev to hopefully save us a - # fulltext. - yield (prev,) + def _iter_snapshots(self): + assert self.revlog.delta_config.sparse_revlog + self.current_stage = _STAGE_SNAPSHOT + good = None + groups = self._iter_snapshots_base() + for candidates in groups: + good = yield candidates + if good is not None: + break + # if we have a refinable value, try to refine it + if good is not None and good.snapshotdepth is not None: + assert self.current_stage == _STAGE_SNAPSHOT + # refine snapshot down + previous = None + while previous != good: + previous = good + base = self.revlog.deltaparent(good.base) + if base == nullrev: + break + good = yield (base,) + # refine snapshot up + if not self.snapshot_cache.snapshots: + self.snapshot_cache.update(self.revlog, good.base + 1) + previous = None + while good != previous: + previous = good + children = tuple( + sorted(c for c in self.snapshot_cache.snapshots[good.base]) + ) + good = yield children + yield None + + def _iter_groups(self): + good = None + for group in self._iter_parents(): + good = yield group + if good is not None: + break + else: + assert good is None + assert self.revlog.delta_config.sparse_revlog + # If sparse revlog is enabled, we can try to refine the + # available deltas + iter_snap = self._iter_snapshots() + group = iter_snap.send(None) + while group is not None: + good = yield group + group = iter_snap.send(good) + yield None class SnapshotCache: @@ -1083,6 +1310,12 @@ class deltacomputer: + """object capable of computing delta and finding delta for multiple revision + + This object is meant to compute and find multiple delta applied to the same + revlog. + """ + def __init__( self, revlog, @@ -1142,7 +1375,9 @@ return delta - def _builddeltainfo(self, revinfo, base, target_rev=None): + def _builddeltainfo( + self, revinfo, base, target_rev=None, as_snapshot=False + ): # can we use the cached delta? revlog = self.revlog chainbase = revlog.chainbase(base) @@ -1160,7 +1395,8 @@ snapshotdepth = None if revlog.delta_config.sparse_revlog and deltabase == nullrev: snapshotdepth = 0 - elif revlog.delta_config.sparse_revlog and revlog.issnapshot(deltabase): + elif revlog.delta_config.sparse_revlog and as_snapshot: + assert revlog.issnapshot(deltabase) # A delta chain should always be one full snapshot, # zero or more semi-snapshots, and zero or more deltas p1, p2 = revlog.rev(revinfo.p1), revlog.rev(revinfo.p2) @@ -1273,20 +1509,6 @@ start = util.timer() dbg = self._one_dbg_data() dbg['revision'] = target_rev - target_revlog = b"UNKNOWN" - target_type = self.revlog.target[0] - target_key = self.revlog.target[1] - if target_type == KIND_CHANGELOG: - target_revlog = b'CHANGELOG:' - elif target_type == KIND_MANIFESTLOG: - target_revlog = b'MANIFESTLOG:' - if target_key: - target_revlog += b'%s:' % target_key - elif target_type == KIND_FILELOG: - target_revlog = b'FILELOG:' - if target_key: - target_revlog += b'%s:' % target_key - dbg['target-revlog'] = target_revlog p1r = revlog.rev(revinfo.p1) p2r = revlog.rev(revinfo.p2) if p1r != nullrev: @@ -1383,7 +1605,7 @@ dbg['delta_try_count'] = 0 dbg['type'] = b"full" if snapshotdepth is None: - dbg['snapshot-depth'] = 0 + dbg['snapshot-depth'] = -1 else: dbg['snapshot-depth'] = snapshotdepth self._dbg_process_data(dbg) @@ -1404,9 +1626,20 @@ msg %= target_rev self._write_debug(msg) - groups = _candidategroups( + # should we try to build a delta? + if not (len(self.revlog) and self.revlog._storedeltachains): + search_cls = _NoDeltaSearch + elif self.revlog.delta_config.sparse_revlog: + search_cls = _SparseDeltaSearch + elif self.revlog.delta_config.general_delta: + search_cls = _GeneralDeltaSearch + else: + # before general delta, there is only one possible delta base + search_cls = _PrevDeltaSearch + + search = search_cls( self.revlog, - revinfo.textlen, + revinfo, p1r, p2r, cachedelta, @@ -1414,8 +1647,13 @@ target_rev, snapshot_cache=self._snapshot_cache, ) - candidaterevs = next(groups) - while candidaterevs is not None: + + while not search.done: + current_group = search.current_group + # current_group can be `None`, but not is search.done is False + # We add this assert to help pytype + assert current_group is not None + candidaterevs = current_group dbg_try_rounds += 1 if self._debug_search: prev = None @@ -1485,6 +1723,7 @@ revinfo, candidaterev, target_rev=target_rev, + as_snapshot=search.current_stage == _STAGE_SNAPSHOT, ) if self._debug_search: delta_end = util.timer() @@ -1492,7 +1731,7 @@ msg %= delta_end - delta_start self._write_debug(msg) if candidatedelta is not None: - if is_good_delta_info(self.revlog, candidatedelta, revinfo): + if search.is_good_delta_info(candidatedelta): if self._debug_search: msg = b"DBG-DELTAS-SEARCH: DELTA: length=%d (GOOD)\n" msg %= candidatedelta.deltalen @@ -1507,10 +1746,7 @@ self._write_debug(msg) if nominateddeltas: deltainfo = min(nominateddeltas, key=lambda x: x.deltalen) - if deltainfo is not None: - candidaterevs = groups.send(deltainfo.base) - else: - candidaterevs = next(groups) + search.next_group(deltainfo) if deltainfo is None: dbg_type = b"full" @@ -1552,12 +1788,12 @@ 'snapshot-depth' ] = deltainfo.snapshotdepth # pytype: disable=attribute-error else: - dbg['snapshot-depth'] = 0 + dbg['snapshot-depth'] = -1 self._dbg_process_data(dbg) return deltainfo def _one_dbg_data(self): - return { + dbg = { 'duration': None, 'revision': None, 'delta-base': None, @@ -1570,6 +1806,21 @@ 'snapshot-depth': None, 'target-revlog': None, } + target_revlog = b"UNKNOWN" + target_type = self.revlog.target[0] + target_key = self.revlog.target[1] + if target_type == KIND_CHANGELOG: + target_revlog = b'CHANGELOG:' + elif target_type == KIND_MANIFESTLOG: + target_revlog = b'MANIFESTLOG:' + if target_key: + target_revlog += b'%s:' % target_key + elif target_type == KIND_FILELOG: + target_revlog = b'FILELOG:' + if target_key: + target_revlog += b'%s:' % target_key + dbg['target-revlog'] = target_revlog + return dbg def _dbg_process_data(self, dbg): if self._debug_info is not None:
--- a/mercurial/revlogutils/randomaccessfile.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/revlogutils/randomaccessfile.py Mon Feb 12 16:22:47 2024 +0100 @@ -116,8 +116,6 @@ if initial_cache: self._cached_chunk_position, self._cached_chunk = initial_cache - self._delay_buffer = None - def clear_cache(self): self._cached_chunk = b'' self._cached_chunk_position = 0 @@ -133,12 +131,7 @@ def _open(self, mode=b'r'): """Return a file object""" - if self._delay_buffer is None: - return self.opener(self.filename, mode=mode) - else: - return appender( - self.opener, self.filename, mode, self._delay_buffer - ) + return self.opener(self.filename, mode=mode) @contextlib.contextmanager def _read_handle(self):
--- a/mercurial/revlogutils/rewrite.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/revlogutils/rewrite.py Mon Feb 12 16:22:47 2024 +0100 @@ -51,14 +51,14 @@ ) -def v1_censor(rl, tr, censornode, tombstone=b''): +def v1_censor(rl, tr, censor_nodes, tombstone=b''): """censors a revision in a "version 1" revlog""" assert rl._format_version == constants.REVLOGV1, rl._format_version # avoid cycle from .. import revlog - censorrev = rl.rev(censornode) + censor_revs = set(rl.rev(node) for node in censor_nodes) tombstone = storageutil.packmeta({b'censored': tombstone}, b'') # Rewriting the revlog in place is hard. Our strategy for censoring is @@ -87,14 +87,14 @@ node = rl.node(rev) p1, p2 = rl.parents(node) - if rev == censorrev: + if rev in censor_revs: newrl.addrawrevision( tombstone, tr, - rl.linkrev(censorrev), + rl.linkrev(rev), p1, p2, - censornode, + node, constants.REVIDX_ISCENSORED, ) @@ -138,12 +138,12 @@ rl._load_inner(chunk_cache) -def v2_censor(revlog, tr, censornode, tombstone=b''): +def v2_censor(revlog, tr, censor_nodes, tombstone=b''): """censors a revision in a "version 2" revlog""" assert revlog._format_version != REVLOGV0, revlog._format_version assert revlog._format_version != REVLOGV1, revlog._format_version - censor_revs = {revlog.rev(censornode)} + censor_revs = {revlog.rev(node) for node in censor_nodes} _rewrite_v2(revlog, tr, censor_revs, tombstone)
--- a/mercurial/scmposix.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/scmposix.py Mon Feb 12 16:22:47 2024 +0100 @@ -3,6 +3,7 @@ import fcntl import os import sys +import typing from typing import ( List, @@ -15,7 +16,7 @@ util, ) -if pycompat.TYPE_CHECKING: +if typing.TYPE_CHECKING: from . import ui as uimod # BSD 'more' escapes ANSI color sequences by default. This can be disabled by
--- a/mercurial/scmutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/scmutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -555,20 +555,23 @@ nodetree = None if cache is not None: nodetree = cache.get(b'disambiguationnodetree') + is_invalidated = getattr(nodetree, 'is_invalidated', lambda: False) + if is_invalidated(): + nodetree = None if not nodetree: - if hasattr(parsers, 'nodetree'): - # The CExt is the only implementation to provide a nodetree - # class so far. + if hasattr(parsers, 'nodetree') and isinstance( + cl.index, parsers.index + ): index = cl.index - if hasattr(index, 'get_cindex'): - # the rust wrapped need to give access to its internal index - index = index.get_cindex() nodetree = parsers.nodetree(index, len(revs)) - for r in revs: - nodetree.insert(r) - if cache is not None: - cache[b'disambiguationnodetree'] = nodetree + elif getattr(cl.index, 'is_rust', False): + nodetree = rustrevlog.NodeTree(cl.index) + if nodetree is not None: + for r in revs: + nodetree.insert(r) + if cache is not None: + cache[b'disambiguationnodetree'] = nodetree length = max(nodetree.shortest(node), minlength) prefix = hexnode[:length] return disambiguate(prefix) @@ -2322,3 +2325,34 @@ schemes. """ return userlist == [b'*'] or username in userlist + + +RESOURCE_HIGH = 3 +RESOURCE_MEDIUM = 2 +RESOURCE_LOW = 1 +RESOURCE_DEFAULT = 0 + +RESOURCE_MAPPING = { + b'default': RESOURCE_DEFAULT, + b'low': RESOURCE_LOW, + b'medium': RESOURCE_MEDIUM, + b'high': RESOURCE_HIGH, +} + +DEFAULT_RESOURCE = RESOURCE_MEDIUM + + +def get_resource_profile(ui, dimension=None): + """return the resource profile for a dimension + + If no dimension is specified, the generic value is returned""" + generic_name = ui.config(b'usage', b'resources') + value = RESOURCE_MAPPING.get(generic_name, RESOURCE_DEFAULT) + if value == RESOURCE_DEFAULT: + value = DEFAULT_RESOURCE + if dimension is not None: + sub_name = ui.config(b'usage', b'resources.%s' % dimension) + sub_value = RESOURCE_MAPPING.get(sub_name, RESOURCE_DEFAULT) + if sub_value != RESOURCE_DEFAULT: + value = sub_value + return value
--- a/mercurial/scmwindows.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/scmwindows.py Mon Feb 12 16:22:47 2024 +0100 @@ -3,6 +3,7 @@ from typing import ( List, + TYPE_CHECKING, Tuple, ) @@ -13,7 +14,7 @@ win32, ) -if pycompat.TYPE_CHECKING: +if TYPE_CHECKING: from . import ui as uimod # MS-DOS 'more' is the only pager available by default on Windows.
--- a/mercurial/sslutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/sslutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -787,7 +787,7 @@ # The "certifi" Python package provides certificates. If it is installed # and usable, assume the user intends it to be used and use it. try: - import certifi + import certifi # pytype: disable=import-error certs = certifi.where() if os.path.exists(certs):
--- a/mercurial/state.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/state.py Mon Feb 12 16:22:47 2024 +0100 @@ -20,23 +20,22 @@ import contextlib +from typing import ( + Any, + Dict, +) + from .i18n import _ from . import ( error, - pycompat, util, ) from .utils import cborutil -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - Dict, - ) - - for t in (Any, Dict): - assert t +# keeps pyflakes happy +for t in (Any, Dict): + assert t class cmdstate: @@ -60,8 +59,7 @@ self._repo = repo self.fname = fname - def read(self): - # type: () -> Dict[bytes, Any] + def read(self) -> Dict[bytes, Any]: """read the existing state file and return a dict of data stored""" return self._read()
--- a/mercurial/statichttprepo.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/statichttprepo.py Mon Feb 12 16:22:47 2024 +0100 @@ -243,7 +243,7 @@ def wlock(self, wait=True): raise error.LockUnavailable( 0, - _(b'lock not available'), + pycompat.sysstr(_(b'lock not available')), b'lock', _(b'cannot lock static-http repository'), ) @@ -251,7 +251,7 @@ def lock(self, wait=True): raise error.LockUnavailable( 0, - _(b'lock not available'), + pycompat.sysstr(_(b'lock not available')), b'lock', _(b'cannot lock static-http repository'), )
--- a/mercurial/statprof.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/statprof.py Mon Feb 12 16:22:47 2024 +0100 @@ -384,7 +384,7 @@ time = sample.time stack = sample.stack sites = [ - b'\1'.join([s.path, b'%d' % s.lineno, s.function]) + b'\1'.join([s.path, b'%d' % s.lineno or -1, s.function]) for s in stack ] file.write(b"%d\0%s\n" % (time, b'\0'.join(sites))) @@ -663,7 +663,7 @@ count / relevant_samples * 100, pycompat.fsencode(parent.filename()), pycompat.sysbytes(parent.function), - parent.lineno, + parent.lineno or -1, pycompat.sysbytes(parent.getsource(50)), ) ) @@ -705,7 +705,7 @@ b' %6.2f%% line %s: %s\n' % ( count / relevant_samples * 100, - child.lineno, + child.lineno or -1, pycompat.sysbytes(child.getsource(50)), ) ) @@ -865,7 +865,7 @@ stack.append( ( pycompat.sysstr(frame.path), - frame.lineno, + frame.lineno or -1, pycompat.sysstr(frame.function), ) ) @@ -954,7 +954,10 @@ ( ( '%s:%d' - % (simplifypath(pycompat.sysstr(frame.path)), frame.lineno), + % ( + simplifypath(pycompat.sysstr(frame.path)), + frame.lineno or -1, + ), pycompat.sysstr(frame.function), ) for frame in sample.stack
--- a/mercurial/subrepoutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/subrepoutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -9,6 +9,16 @@ import os import posixpath import re +import typing + +from typing import ( + Any, + Dict, + List, + Optional, + Set, + Tuple, +) from .i18n import _ from . import ( @@ -17,7 +27,6 @@ filemerge, pathutil, phases, - pycompat, util, ) from .utils import ( @@ -25,17 +34,19 @@ urlutil, ) +# keeps pyflakes happy +assert [ + Any, + Dict, + List, + Optional, + Set, + Tuple, +] + nullstate = (b'', b'', b'empty') -if pycompat.TYPE_CHECKING: - from typing import ( - Any, - Dict, - List, - Optional, - Set, - Tuple, - ) +if typing.TYPE_CHECKING: from . import ( context, localrepo, @@ -45,16 +56,25 @@ ui as uimod, ) - Substate = Dict[bytes, Tuple[bytes, bytes, bytes]] + # keeps pyflakes happy + assert [ + context, + localrepo, + matchmod, + scmutil, + subrepo, + uimod, + ] + +Substate = Dict[bytes, Tuple[bytes, bytes, bytes]] -def state(ctx, ui): - # type: (context.changectx, uimod.ui) -> Substate +def state(ctx: "context.changectx", ui: "uimod.ui") -> Substate: """return a state dict, mapping subrepo paths configured in .hgsub to tuple: (source from .hgsub, revision from .hgsubstate, kind (key in types dict)) """ - p = config.config() + p: config.config = config.config() repo = ctx.repo() def read(f, sections=None, remap=None): @@ -101,8 +121,7 @@ except FileNotFoundError: pass - def remap(src): - # type: (bytes) -> bytes + def remap(src: bytes) -> bytes: for pattern, repl in p.items(b'subpaths'): # Turn r'C:\foo\bar' into r'C:\\foo\\bar' since re.sub # does a string decode. @@ -124,7 +143,7 @@ return src state = {} - for path, src in p.items(b''): # type: bytes + for path, src in p.items(b''): kind = b'hg' if src.startswith(b'['): if b']' not in src: @@ -154,8 +173,7 @@ return state -def writestate(repo, state): - # type: (localrepo.localrepository, Substate) -> None +def writestate(repo: "localrepo.localrepository", state: Substate) -> None: """rewrite .hgsubstate in (outer) repo with these subrepo states""" lines = [ b'%s %s\n' % (state[s][1], s) @@ -165,8 +183,14 @@ repo.wwrite(b'.hgsubstate', b''.join(lines), b'') -def submerge(repo, wctx, mctx, actx, overwrite, labels=None): - # type: (localrepo.localrepository, context.workingctx, context.changectx, context.changectx, bool, Optional[Any]) -> Substate +def submerge( + repo: "localrepo.localrepository", + wctx: "context.workingctx", + mctx: "context.changectx", + actx: "context.changectx", + overwrite: bool, + labels: Optional[Any] = None, +) -> Substate: # TODO: type the `labels` arg """delegated from merge.applyupdates: merging of .hgsubstate file in working context, merging context and ancestor context""" @@ -306,8 +330,13 @@ return sm -def precommit(ui, wctx, status, match, force=False): - # type: (uimod.ui, context.workingcommitctx, scmutil.status, matchmod.basematcher, bool) -> Tuple[List[bytes], Set[bytes], Substate] +def precommit( + ui: "uimod.ui", + wctx: "context.workingcommitctx", + status: "scmutil.status", + match: "matchmod.basematcher", + force: bool = False, +) -> Tuple[List[bytes], Set[bytes], Substate]: """Calculate .hgsubstate changes that should be applied before committing Returns (subs, commitsubs, newstate) where @@ -395,8 +424,7 @@ return posixpath.normpath(path) -def reporelpath(repo): - # type: (localrepo.localrepository) -> bytes +def reporelpath(repo: "localrepo.localrepository") -> bytes: """return path to this (sub)repo as seen from outermost repo""" parent = repo while hasattr(parent, '_subparent'): @@ -404,14 +432,16 @@ return repo.root[len(pathutil.normasprefix(parent.root)) :] -def subrelpath(sub): - # type: (subrepo.abstractsubrepo) -> bytes +def subrelpath(sub: "subrepo.abstractsubrepo") -> bytes: """return path to this subrepo as seen from outermost repo""" return sub._relpath -def _abssource(repo, push=False, abort=True): - # type: (localrepo.localrepository, bool, bool) -> Optional[bytes] +def _abssource( + repo: "localrepo.localrepository", + push: bool = False, + abort: bool = True, +) -> Optional[bytes]: """return pull/push path of repo - either based on parent repo .hgsub info or on the top repo config. Abort or return None if no source found.""" if hasattr(repo, '_subparent'): @@ -459,8 +489,7 @@ raise error.Abort(_(b"default path for subrepository not found")) -def newcommitphase(ui, ctx): - # type: (uimod.ui, context.changectx) -> int +def newcommitphase(ui: "uimod.ui", ctx: "context.changectx") -> int: commitphase = phases.newcommitphase(ui) substate = getattr(ctx, "substate", None) if not substate:
--- a/mercurial/templatekw.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/templatekw.py Mon Feb 12 16:22:47 2024 +0100 @@ -270,7 +270,7 @@ ui = context.resource(mapping, b'ui') ctx = context.resource(mapping, b'ctx') diffopts = diffutil.diffallopts(ui, {b'noprefix': False}) - diff = ctx.diff(opts=diffopts) + diff = ctx.diff(diffutil.diff_parent(ctx), opts=diffopts) stats = patch.diffstatdata(util.iterlines(diff)) maxname, maxtotal, adds, removes, binary = patch.diffstatsum(stats) return b'%d: +%d/-%d' % (len(stats), adds, removes)
--- a/mercurial/testing/revlog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/testing/revlog.py Mon Feb 12 16:22:47 2024 +0100 @@ -21,17 +21,42 @@ b'\x00\x00\x00\x00\x00\x00\x00\x00\x00' ) +from ..revlogutils.constants import REVLOGV1 + try: from ..cext import parsers as cparsers # pytype: disable=import-error except ImportError: cparsers = None +try: + from ..rustext.revlog import ( # pytype: disable=import-error + Index as RustIndex, + ) +except ImportError: + RustIndex = None + @unittest.skipIf( cparsers is None, 'The C version of the "parsers" module is not available. It is needed for this test.', ) class RevlogBasedTestBase(unittest.TestCase): - def parseindex(self): - return cparsers.parse_index2(data_non_inlined, False)[0] + def parseindex(self, data=None): + if data is None: + data = data_non_inlined + return cparsers.parse_index2(data, False)[0] + + +@unittest.skipIf( + RustIndex is None, + 'The Rust index is not available. It is needed for this test.', +) +class RustRevlogBasedTestBase(unittest.TestCase): + def parserustindex(self, data=None): + if data is None: + data = data_non_inlined + # not inheriting RevlogBasedTestCase to avoid having a + # `parseindex` method that would be shadowed by future subclasses + # this duplication will soon be removed + return RustIndex(data, REVLOGV1)
--- a/mercurial/testing/storage.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/testing/storage.py Mon Feb 12 16:22:47 2024 +0100 @@ -1280,7 +1280,7 @@ node2 = f.add(b'foo\n' * 32, None, tr, 2, node1, f.nullid) with self._maketransactionfn() as tr: - f.censorrevision(tr, node1) + f.censorrevision(tr, [node1]) self.assertEqual(len(f), 3) self.assertEqual(list(f.revs()), [0, 1, 2])
--- a/mercurial/ui.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/ui.py Mon Feb 12 16:22:47 2024 +0100 @@ -18,6 +18,7 @@ import subprocess import sys import traceback +import typing from typing import ( Any, @@ -1766,7 +1767,7 @@ return line - if pycompat.TYPE_CHECKING: + if typing.TYPE_CHECKING: @overload def prompt(self, msg: bytes, default: bytes) -> bytes: @@ -1782,7 +1783,7 @@ """ return self._prompt(msg, default=default) - if pycompat.TYPE_CHECKING: + if typing.TYPE_CHECKING: @overload def _prompt(
--- a/mercurial/upgrade_utils/actions.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/upgrade_utils/actions.py Mon Feb 12 16:22:47 2024 +0100 @@ -5,12 +5,17 @@ # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. +import random + +from typing import ( + List, + Type, +) from ..i18n import _ from .. import ( error, localrepo, - pycompat, requirements, revlog, util, @@ -18,12 +23,11 @@ from ..utils import compression -if pycompat.TYPE_CHECKING: - from typing import ( - List, - Type, - ) - +# keeps pyflakes happy +assert [ + List, + Type, +] # list of requirements that request a clone of all revlog if added/removed RECLONES_REQUIREMENTS = { @@ -104,7 +108,7 @@ compatible_with_share = False -allformatvariant = [] # type: List[Type['formatvariant']] +allformatvariant: List[Type['formatvariant']] = [] def registerformatvariant(cls): @@ -409,9 +413,17 @@ def fromrepo(repo): # Mercurial 4.0 changed changelogs to not use delta chains. Search for # changelogs with deltas. - cl = repo.changelog + cl = repo.unfiltered().changelog + if len(cl) <= 1000: + some_rev = list(cl) + else: + # do a random sampling to speeds things up Scanning the whole + # repository can get really slow on bigger repo. + some_rev = sorted( + {random.randint(0, len(cl) - 1) for x in range(1000)} + ) chainbase = cl.chainbase - return all(rev == chainbase(rev) for rev in cl) + return all(rev == chainbase(rev) for rev in some_rev) @staticmethod def fromconfig(repo):
--- a/mercurial/util.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/util.py Mon Feb 12 16:22:47 2024 +0100 @@ -34,6 +34,14 @@ import traceback import warnings +from typing import ( + Iterable, + Iterator, + List, + Optional, + Tuple, +) + from .node import hex from .thirdparty import attr from .pycompat import ( @@ -55,14 +63,14 @@ stringutil, ) -if pycompat.TYPE_CHECKING: - from typing import ( - Iterable, - Iterator, - List, - Optional, - Tuple, - ) +# keeps pyflakes happy +assert [ + Iterable, + Iterator, + List, + Optional, + Tuple, +] base85 = policy.importmod('base85') @@ -139,8 +147,7 @@ username = platform.username -def setumask(val): - # type: (int) -> None +def setumask(val: int) -> None: '''updates the umask. used by chg server''' if pycompat.iswindows: return @@ -1520,7 +1527,6 @@ raise return default - assert node is not None # help pytype value = node.value self.totalcost -= node.cost node.markempty() @@ -1548,7 +1554,6 @@ """ try: node = self._cache[k] - assert node is not None # help pytype return node.value except KeyError: if default is _notset: @@ -1607,13 +1612,9 @@ # a non-empty node. n = self._head.prev - assert n is not None # help pytype - while n.key is _notset: n = n.prev - assert n is not None # help pytype - key, value = n.key, n.value # And remove it from the cache and mark it as empty. @@ -1623,7 +1624,7 @@ return key, value - def _movetohead(self, node): + def _movetohead(self, node: _lrucachenode): """Mark a node as the newest, making it the new head. When a node is accessed, it becomes the freshest entry in the LRU @@ -1670,7 +1671,7 @@ self._head = node - def _addcapacity(self): + def _addcapacity(self) -> _lrucachenode: """Add a node to the circular linked list. The new node is inserted before the head node. @@ -1842,8 +1843,7 @@ nogc = lambda x: x -def pathto(root, n1, n2): - # type: (bytes, bytes, bytes) -> bytes +def pathto(root: bytes, n1: bytes, n2: bytes) -> bytes: """return the relative path from one place to another. root should use os.sep to separate directories n1 should use os.sep to separate directories @@ -2054,8 +2054,7 @@ _winreservedchars = b':*?"<>|' -def checkwinfilename(path): - # type: (bytes) -> Optional[bytes] +def checkwinfilename(path: bytes) -> Optional[bytes]: r"""Check that the base-relative path is a valid filename on Windows. Returns None if the path is ok, or a UI string describing the problem. @@ -2121,7 +2120,7 @@ if pycompat.iswindows: checkosfilename = checkwinfilename if not timer: - timer = time.clock + timer = time.clock # pytype: disable=module-attr else: # mercurial.windows doesn't have platform.checkosfilename checkosfilename = platform.checkosfilename # pytype: disable=module-attr @@ -2149,8 +2148,7 @@ os.close(ld) -def readlock(pathname): - # type: (bytes) -> bytes +def readlock(pathname: bytes) -> bytes: try: return readlink(pathname) except OSError as why: @@ -2173,8 +2171,7 @@ # File system features -def fscasesensitive(path): - # type: (bytes) -> bool +def fscasesensitive(path: bytes) -> bool: """ Return true if the given path is on a case-sensitive filesystem @@ -2278,8 +2275,7 @@ _fspathcache = {} -def fspath(name, root): - # type: (bytes, bytes) -> bytes +def fspath(name: bytes, root: bytes) -> bytes: """Get name in the case stored in the filesystem The name should be relative to root, and be normcase-ed for efficiency. @@ -2323,8 +2319,7 @@ return b''.join(result) -def checknlink(testfile): - # type: (bytes) -> bool +def checknlink(testfile: bytes) -> bool: '''check whether hardlink count reporting works properly''' # testfile may be open, so we need a separate file for checking to @@ -2357,8 +2352,7 @@ pass -def endswithsep(path): - # type: (bytes) -> bool +def endswithsep(path: bytes) -> bool: '''Check path ends with os.sep or os.altsep.''' return bool( # help pytype path.endswith(pycompat.ossep) @@ -2367,8 +2361,7 @@ ) -def splitpath(path): - # type: (bytes) -> List[bytes] +def splitpath(path: bytes) -> List[bytes]: """Split path by os.sep. Note that this function does not use os.altsep because this is an alternative of simple "xxx.split(os.sep)". @@ -2601,8 +2594,9 @@ raise -def unlinkpath(f, ignoremissing=False, rmdir=True): - # type: (bytes, bool, bool) -> None +def unlinkpath( + f: bytes, ignoremissing: bool = False, rmdir: bool = True +) -> None: """unlink and remove the directory if it is empty""" if ignoremissing: tryunlink(f) @@ -2616,8 +2610,7 @@ pass -def tryunlink(f): - # type: (bytes) -> None +def tryunlink(f: bytes) -> None: """Attempt to remove a file, ignoring FileNotFoundError.""" try: unlink(f) @@ -2625,8 +2618,9 @@ pass -def makedirs(name, mode=None, notindexed=False): - # type: (bytes, Optional[int], bool) -> None +def makedirs( + name: bytes, mode: Optional[int] = None, notindexed: bool = False +) -> None: """recursive directory creation with parent mode inheritance Newly created directories are marked as "not to be indexed by @@ -2655,20 +2649,17 @@ os.chmod(name, mode) -def readfile(path): - # type: (bytes) -> bytes +def readfile(path: bytes) -> bytes: with open(path, b'rb') as fp: return fp.read() -def writefile(path, text): - # type: (bytes, bytes) -> None +def writefile(path: bytes, text: bytes) -> None: with open(path, b'wb') as fp: fp.write(text) -def appendfile(path, text): - # type: (bytes, bytes) -> None +def appendfile(path: bytes, text: bytes) -> None: with open(path, b'ab') as fp: fp.write(text) @@ -2829,8 +2820,7 @@ return go -def processlinerange(fromline, toline): - # type: (int, int) -> Tuple[int, int] +def processlinerange(fromline: int, toline: int) -> Tuple[int, int]: """Check that linerange <fromline>:<toline> makes sense and return a 0-based range. @@ -2889,13 +2879,11 @@ _eolre = remod.compile(br'\r*\n') -def tolf(s): - # type: (bytes) -> bytes +def tolf(s: bytes) -> bytes: return _eolre.sub(b'\n', s) -def tocrlf(s): - # type: (bytes) -> bytes +def tocrlf(s: bytes) -> bytes: return _eolre.sub(b'\r\n', s) @@ -2918,15 +2906,13 @@ return fp -def iterlines(iterator): - # type: (Iterable[bytes]) -> Iterator[bytes] +def iterlines(iterator: Iterable[bytes]) -> Iterator[bytes]: for chunk in iterator: for line in chunk.splitlines(): yield line -def expandpath(path): - # type: (bytes) -> bytes +def expandpath(path: bytes) -> bytes: return os.path.expanduser(os.path.expandvars(path)) @@ -3054,8 +3040,7 @@ ) -def sizetoint(s): - # type: (bytes) -> int +def sizetoint(s: bytes) -> int: """Convert a space specifier to a byte count. >>> sizetoint(b'30') @@ -3277,8 +3262,7 @@ yield -def _estimatememory(): - # type: () -> Optional[int] +def _estimatememory() -> Optional[int]: """Provide an estimate for the available system memory in Bytes. If no estimate can be provided on the platform, returns None.
--- a/mercurial/utils/dateutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/utils/dateutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -10,6 +10,15 @@ import datetime import time +from typing import ( + Callable, + Dict, + Iterable, + Optional, + Tuple, + Union, +) + from ..i18n import _ from .. import ( encoding, @@ -17,17 +26,17 @@ pycompat, ) -if pycompat.TYPE_CHECKING: - from typing import ( - Callable, - Dict, - Iterable, - Optional, - Tuple, - Union, - ) +# keeps pyflakes happy +assert [ + Callable, + Dict, + Iterable, + Optional, + Tuple, + Union, +] - hgdate = Tuple[float, int] # (unixtime, offset) +hgdate = Tuple[float, int] # (unixtime, offset) # used by parsedate defaultdateformats = ( @@ -72,8 +81,7 @@ ) -def makedate(timestamp=None): - # type: (Optional[float]) -> hgdate +def makedate(timestamp: Optional[float] = None) -> hgdate: """Return a unix timestamp (or the current time) as a (unixtime, offset) tuple based off the local timezone.""" if timestamp is None: @@ -94,8 +102,10 @@ return timestamp, tz -def datestr(date=None, format=b'%a %b %d %H:%M:%S %Y %1%2'): - # type: (Optional[hgdate], bytes) -> bytes +def datestr( + date: Optional[hgdate] = None, + format: bytes = b'%a %b %d %H:%M:%S %Y %1%2', +) -> bytes: """represent a (unixtime, offset) tuple as a localized time. unixtime is seconds since the epoch, and offset is the time zone's number of seconds away from UTC. @@ -132,14 +142,12 @@ return s -def shortdate(date=None): - # type: (Optional[hgdate]) -> bytes +def shortdate(date: Optional[hgdate] = None) -> bytes: """turn (timestamp, tzoff) tuple into iso 8631 date.""" return datestr(date, format=b'%Y-%m-%d') -def parsetimezone(s): - # type: (bytes) -> Tuple[Optional[int], bytes] +def parsetimezone(s: bytes) -> Tuple[Optional[int], bytes]: """find a trailing timezone, if any, in string, and return a (offset, remainder) pair""" s = pycompat.bytestr(s) @@ -174,8 +182,11 @@ return None, s -def strdate(string, format, defaults=None): - # type: (bytes, bytes, Optional[Dict[bytes, Tuple[bytes, bytes]]]) -> hgdate +def strdate( + string: bytes, + format: bytes, + defaults: Optional[Dict[bytes, Tuple[bytes, bytes]]] = None, +) -> hgdate: """parse a localized time string and return a (unixtime, offset) tuple. if the string cannot be parsed, ValueError is raised.""" if defaults is None: @@ -217,8 +228,11 @@ return unixtime, offset -def parsedate(date, formats=None, bias=None): - # type: (Union[bytes, hgdate], Optional[Iterable[bytes]], Optional[Dict[bytes, bytes]]) -> hgdate +def parsedate( + date: Union[bytes, hgdate], + formats: Optional[Iterable[bytes]] = None, + bias: Optional[Dict[bytes, bytes]] = None, +) -> hgdate: """parse a localized date/time and return a (unixtime, offset) tuple. The date may be a "unixtime offset" string or in one of the specified @@ -307,8 +321,7 @@ return when, offset -def matchdate(date): - # type: (bytes) -> Callable[[float], bool] +def matchdate(date: bytes) -> Callable[[float], bool]: """Return a function that matches a given date match specifier Formats include: @@ -337,13 +350,11 @@ False """ - def lower(date): - # type: (bytes) -> float + def lower(date: bytes) -> float: d = {b'mb': b"1", b'd': b"1"} return parsedate(date, extendeddateformats, d)[0] - def upper(date): - # type: (bytes) -> float + def upper(date: bytes) -> float: d = {b'mb': b"12", b'HI': b"23", b'M': b"59", b'S': b"59"} for days in (b"31", b"30", b"29"): try:
--- a/mercurial/utils/urlutil.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/utils/urlutil.py Mon Feb 12 16:22:47 2024 +0100 @@ -8,13 +8,16 @@ import re as remod import socket +from typing import ( + Union, +) + from ..i18n import _ from .. import ( encoding, error, pycompat, urllibcompat, - util, ) from . import ( @@ -25,17 +28,13 @@ constants as revlog_constants, ) - -if pycompat.TYPE_CHECKING: - from typing import ( - Union, - ) +# keeps pyflakes happy +assert [Union] urlreq = urllibcompat.urlreq -def getport(port): - # type: (Union[bytes, int]) -> int +def getport(port: Union[bytes, int]) -> int: """Return the port for a given network service. If port is an integer, it's returned as is. If it's a string, it's @@ -133,8 +132,12 @@ _safepchars = b"/!~*'()+:\\" _matchscheme = remod.compile(b'^[a-zA-Z0-9+.\\-]+:').match - def __init__(self, path, parsequery=True, parsefragment=True): - # type: (bytes, bool, bool) -> None + def __init__( + self, + path: bytes, + parsequery: bool = True, + parsefragment: bool = True, + ) -> None: # We slowly chomp away at path until we have only the path left self.scheme = self.user = self.passwd = self.host = None self.port = self.path = self.query = self.fragment = None @@ -378,8 +381,7 @@ return True # POSIX-style return False - def localpath(self): - # type: () -> bytes + def localpath(self) -> bytes: if self.scheme == b'file' or self.scheme == b'bundle': path = self.path or b'/' # For Windows, we need to promote hosts containing drive @@ -402,23 +404,19 @@ ) -def hasscheme(path): - # type: (bytes) -> bool +def hasscheme(path: bytes) -> bool: return bool(url(path).scheme) # cast to help pytype -def hasdriveletter(path): - # type: (bytes) -> bool +def hasdriveletter(path: bytes) -> bool: return bool(path) and path[1:2] == b':' and path[0:1].isalpha() -def urllocalpath(path): - # type: (bytes) -> bytes +def urllocalpath(path: bytes) -> bytes: return url(path, parsequery=False, parsefragment=False).localpath() -def checksafessh(path): - # type: (bytes) -> None +def checksafessh(path: bytes) -> None: """check if a path / url is a potentially unsafe ssh exploit (SEC) This is a sanity check for ssh urls. ssh will parse the first item as @@ -435,8 +433,7 @@ ) -def hidepassword(u): - # type: (bytes) -> bytes +def hidepassword(u: bytes) -> bytes: '''hide user credential in a url string''' u = url(u) if u.passwd: @@ -444,8 +441,7 @@ return bytes(u) -def removeauth(u): - # type: (bytes) -> bytes +def removeauth(u: bytes) -> bytes: '''remove all authentication information from a url string''' u = url(u) u.user = u.passwd = None @@ -680,8 +676,7 @@ """ if isinstance(attr, bytes): msg = b'pathsuboption take `str` as "attr" argument, not `bytes`' - util.nouideprecwarn(msg, b"6.6", stacklevel=2) - attr = attr.decode('ascii') + raise TypeError(msg) def register(func): _pathsuboptions[option] = (attr, func) @@ -923,14 +918,6 @@ new._setup_url(self._pushloc) return new - def pushloc(self): - """compatibility layer for the deprecated attributes""" - from .. import util # avoid a cycle - - msg = "don't use path.pushloc, use path.get_push_variant()" - util.nouideprecwarn(msg, b"6.5") - return self._pushloc - def _validate_path(self): # When given a raw location but not a symbolic name, validate the # location is valid.
--- a/mercurial/windows.py Mon Feb 12 16:17:08 2024 +0100 +++ b/mercurial/windows.py Mon Feb 12 16:22:47 2024 +0100 @@ -61,13 +61,7 @@ unlink = win32.unlink if typing.TYPE_CHECKING: - # Replace the various overloads that come along with aliasing stdlib methods - # with the narrow definition that we care about in the type checking phase - # only. This ensures that both Windows and POSIX see only the definition - # that is actually available. - # - # Note that if we check pycompat.TYPE_CHECKING here, it is always False, and - # the methods aren't replaced. + def split(p: bytes) -> Tuple[bytes, bytes]: raise NotImplementedError
--- a/rust/Cargo.lock Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/Cargo.lock Mon Feb 12 16:22:47 2024 +0100 @@ -70,6 +70,18 @@ ] [[package]] +name = "bitvec" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bc2832c24239b0141d5674bb9174f9d68a8b5b3f2753311927c172ca46f7e9c" +dependencies = [ + "funty", + "radium", + "tap", + "wyz", +] + +[[package]] name = "block-buffer" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -443,6 +455,12 @@ ] [[package]] +name = "funty" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6d5a32815ae3f33302d95fdcb2ce17862f8c65363dcfd29360480ba1001fc9c" + +[[package]] name = "generic-array" version = "0.14.6" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -516,6 +534,7 @@ version = "0.1.0" dependencies = [ "bitflags", + "bitvec", "byteorder", "bytes-cast", "clap", @@ -915,6 +934,12 @@ ] [[package]] +name = "radium" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" + +[[package]] name = "rand" version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1226,6 +1251,12 @@ ] [[package]] +name = "tap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" + +[[package]] name = "tempfile" version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -1489,6 +1520,15 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" [[package]] +name = "wyz" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f360fc0b24296329c78fda852a1e9ae82de9cf7b27dae4b7f62f118f77b9ed" +dependencies = [ + "tap", +] + +[[package]] name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index"
--- a/rust/hg-core/Cargo.toml Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/Cargo.toml Mon Feb 12 16:22:47 2024 +0100 @@ -39,6 +39,7 @@ zstd = "0.12" format-bytes = "0.3.0" once_cell = "1.16.0" +bitvec = "1.0.1" # We don't use the `miniz-oxide` backend to not change rhg benchmarks and until # we have a clearer view of which backend is the fastest.
--- a/rust/hg-core/src/dagops.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/dagops.rs Mon Feb 12 16:22:47 2024 +0100 @@ -12,8 +12,10 @@ //! mean those revisions that have no children among the collection. //! - Similarly *relative roots* of a collection of `Revision`, we mean those //! whose parents, if any, don't belong to the collection. +use bitvec::slice::BitSlice; + use super::{Graph, GraphError, Revision, NULL_REVISION}; -use crate::ancestors::AncestorsIterator; +use crate::{ancestors::AncestorsIterator, BaseRevision}; use std::collections::{BTreeSet, HashSet}; fn remove_parents<S: std::hash::BuildHasher>( @@ -81,6 +83,32 @@ Ok(()) } +/// Optimized version of `retain_heads` that expects an zeroed bitvec of the +/// size of the graph, to act as a faster but less space-efficient `HashSet`. +/// +/// # Panics +/// +/// Can panic if `not_heads` is shorten than the length of graph. +pub fn retain_heads_fast( + graph: &impl Graph, + not_heads: &mut BitSlice, + filtered_revs: &HashSet<Revision>, +) -> Result<(), GraphError> { + for idx in (0..not_heads.len()).rev() { + let rev = Revision(idx as BaseRevision); + if !not_heads[idx] && filtered_revs.contains(&rev) { + not_heads.get_mut(idx).unwrap().commit(true); + continue; + } + for parent in graph.parents(rev)?.iter() { + if *parent != NULL_REVISION { + not_heads.get_mut(parent.0 as usize).unwrap().commit(true); + } + } + } + Ok(()) +} + /// Roots of `revs`, passed as a `HashSet` /// /// They are returned in arbitrary order
--- a/rust/hg-core/src/matchers.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/matchers.rs Mon Feb 12 16:22:47 2024 +0100 @@ -388,6 +388,15 @@ /// assert_eq!(matcher.matches(HgPath::new(b"this should work")), true); /// assert_eq!(matcher.matches(HgPath::new(b"this also")), true); /// assert_eq!(matcher.matches(HgPath::new(b"but not this")), false); +/// /// +/// let ignore_patterns = +/// vec![IgnorePattern::new(PatternSyntax::RootFiles, b"dir/subdir", Path::new(""))]; +/// let matcher = IncludeMatcher::new(ignore_patterns).unwrap(); +/// /// +/// assert!(!matcher.matches(HgPath::new(b"file"))); +/// assert!(!matcher.matches(HgPath::new(b"dir/file"))); +/// assert!(matcher.matches(HgPath::new(b"dir/subdir/file"))); +/// assert!(!matcher.matches(HgPath::new(b"dir/subdir/subsubdir/file"))); /// ``` pub struct IncludeMatcher<'a> { patterns: Vec<u8>, @@ -951,12 +960,8 @@ let match_func = move |path: &HgPath| -> bool { let path = path.as_bytes(); - let i = path.iter().rfind(|a| **a == b'/'); - let dir = if let Some(i) = i { - &path[..*i as usize] - } else { - b"." - }; + let i = path.iter().rposition(|a| *a == b'/'); + let dir = if let Some(i) = i { &path[..i] } else { b"." }; dirs.contains(dir) }; match_funcs.push(Box::new(match_func));
--- a/rust/hg-core/src/operations/debugdata.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/operations/debugdata.rs Mon Feb 12 16:22:47 2024 +0100 @@ -6,11 +6,10 @@ // GNU General Public License version 2 or any later version. use crate::repo::Repo; -use crate::requirements; use crate::revlog::{Revlog, RevlogError}; /// Kind of data to debug -#[derive(Debug, Copy, Clone)] +#[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum DebugDataKind { Changelog, Manifest, @@ -26,11 +25,12 @@ DebugDataKind::Changelog => "00changelog.i", DebugDataKind::Manifest => "00manifest.i", }; - let use_nodemap = repo - .requirements() - .contains(requirements::NODEMAP_REQUIREMENT); - let revlog = - Revlog::open(&repo.store_vfs(), index_file, None, use_nodemap)?; + let revlog = Revlog::open( + &repo.store_vfs(), + index_file, + None, + repo.default_revlog_options(kind == DebugDataKind::Changelog)?, + )?; let rev = crate::revset::resolve_rev_number_or_hex_prefix(revset, &revlog)?; let data = revlog.get_rev_data_for_checked_rev(rev)?;
--- a/rust/hg-core/src/operations/mod.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/operations/mod.rs Mon Feb 12 16:22:47 2024 +0100 @@ -5,6 +5,8 @@ mod cat; mod debugdata; mod list_tracked_files; +mod status_rev_rev; pub use cat::{cat, CatOutput}; pub use debugdata::{debug_data, DebugDataKind}; pub use list_tracked_files::{list_rev_tracked_files, FilesForRev}; +pub use status_rev_rev::{status_rev_rev_no_copies, DiffStatus, StatusRevRev};
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rust/hg-core/src/operations/status_rev_rev.rs Mon Feb 12 16:22:47 2024 +0100 @@ -0,0 +1,89 @@ +use crate::errors::HgError; +use crate::matchers::Matcher; +use crate::repo::Repo; +use crate::revlog::manifest::Manifest; +use crate::utils::filter_map_results; +use crate::utils::hg_path::HgPath; +use crate::utils::merge_join_results_by; + +use crate::Revision; + +use itertools::EitherOrBoth; + +#[derive(Debug, Copy, Clone)] +pub enum DiffStatus { + Removed, + Added, + Matching, + Modified, +} + +pub struct StatusRevRev { + manifest1: Manifest, + manifest2: Manifest, + narrow_matcher: Box<dyn Matcher>, +} + +fn manifest_for_rev(repo: &Repo, rev: Revision) -> Result<Manifest, HgError> { + repo.manifest_for_rev(rev.into()).map_err(|e| { + HgError::corrupted(format!( + "manifest lookup failed for revision {}: {}", + rev, e + )) + }) +} + +pub fn status_rev_rev_no_copies( + repo: &Repo, + rev1: Revision, + rev2: Revision, + narrow_matcher: Box<dyn Matcher>, +) -> Result<StatusRevRev, HgError> { + let manifest1 = manifest_for_rev(repo, rev1)?; + let manifest2 = manifest_for_rev(repo, rev2)?; + Ok(StatusRevRev { + manifest1, + manifest2, + narrow_matcher, + }) +} + +impl StatusRevRev { + pub fn iter( + &self, + ) -> impl Iterator<Item = Result<(&HgPath, DiffStatus), HgError>> { + let iter1 = self.manifest1.iter(); + let iter2 = self.manifest2.iter(); + + let merged = + merge_join_results_by(iter1, iter2, |i1, i2| i1.path.cmp(i2.path)); + + filter_map_results(merged, |entry| { + let (path, status) = match entry { + EitherOrBoth::Left(entry) => { + let path = entry.path; + (path, DiffStatus::Removed) + } + EitherOrBoth::Right(entry) => { + let path = entry.path; + (path, DiffStatus::Added) + } + EitherOrBoth::Both(entry1, entry2) => { + let path = entry1.path; + if entry1.node_id().unwrap() == entry2.node_id().unwrap() + && entry1.flags == entry2.flags + { + (path, DiffStatus::Matching) + } else { + (path, DiffStatus::Modified) + } + } + }; + Ok(if self.narrow_matcher.matches(path) { + Some((path, status)) + } else { + None + }) + }) + } +}
--- a/rust/hg-core/src/repo.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/repo.rs Mon Feb 12 16:22:47 2024 +0100 @@ -8,6 +8,10 @@ use crate::errors::{HgError, IoResultExt}; use crate::lock::{try_with_lock_no_wait, LockError}; use crate::manifest::{Manifest, Manifestlog}; +use crate::requirements::{ + CHANGELOGV2_REQUIREMENT, GENERALDELTA_REQUIREMENT, NODEMAP_REQUIREMENT, + REVLOGV1_REQUIREMENT, REVLOGV2_REQUIREMENT, +}; use crate::revlog::filelog::Filelog; use crate::revlog::RevlogError; use crate::utils::debug::debug_wait_for_file_or_print; @@ -15,8 +19,10 @@ use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::{is_dir, is_file, Vfs}; -use crate::DirstateError; -use crate::{requirements, NodePrefix, UncheckedRevision}; +use crate::{ + requirements, NodePrefix, RevlogVersionOptions, UncheckedRevision, +}; +use crate::{DirstateError, RevlogOpenOptions}; use std::cell::{Ref, RefCell, RefMut}; use std::collections::HashSet; use std::io::Seek; @@ -523,7 +529,7 @@ } fn new_changelog(&self) -> Result<Changelog, HgError> { - Changelog::open(&self.store_vfs(), self.has_nodemap()) + Changelog::open(&self.store_vfs(), self.default_revlog_options(true)?) } pub fn changelog(&self) -> Result<Ref<Changelog>, HgError> { @@ -535,7 +541,10 @@ } fn new_manifestlog(&self) -> Result<Manifestlog, HgError> { - Manifestlog::open(&self.store_vfs(), self.has_nodemap()) + Manifestlog::open( + &self.store_vfs(), + self.default_revlog_options(false)?, + ) } pub fn manifestlog(&self) -> Result<Ref<Manifestlog>, HgError> { @@ -581,7 +590,7 @@ } pub fn filelog(&self, path: &HgPath) -> Result<Filelog, HgError> { - Filelog::open(self, path) + Filelog::open(self, path, self.default_revlog_options(false)?) } /// Write to disk any updates that were made through `dirstate_map_mut`. @@ -730,6 +739,35 @@ } Ok(()) } + + pub fn default_revlog_options( + &self, + changelog: bool, + ) -> Result<RevlogOpenOptions, HgError> { + let requirements = self.requirements(); + let version = if changelog + && requirements.contains(CHANGELOGV2_REQUIREMENT) + { + let compute_rank = self + .config() + .get_bool(b"experimental", b"changelog-v2.compute-rank")?; + RevlogVersionOptions::ChangelogV2 { compute_rank } + } else if requirements.contains(REVLOGV2_REQUIREMENT) { + RevlogVersionOptions::V2 + } else if requirements.contains(REVLOGV1_REQUIREMENT) { + RevlogVersionOptions::V1 { + generaldelta: requirements.contains(GENERALDELTA_REQUIREMENT), + } + } else { + RevlogVersionOptions::V0 + }; + Ok(RevlogOpenOptions { + version, + // We don't need to dance around the slow path like in the Python + // implementation since we know we have access to the fast code. + use_nodemap: requirements.contains(NODEMAP_REQUIREMENT), + }) + } } /// Lazily-initialized component of `Repo` with interior mutability
--- a/rust/hg-core/src/requirements.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/requirements.rs Mon Feb 12 16:22:47 2024 +0100 @@ -77,7 +77,7 @@ /// rhg supports repository with or without these const SUPPORTED: &[&str] = &[ - "generaldelta", + GENERALDELTA_REQUIREMENT, SHARED_REQUIREMENT, SHARESAFE_REQUIREMENT, SPARSEREVLOG_REQUIREMENT, @@ -100,6 +100,7 @@ // Copied from mercurial/requirements.py: pub const DIRSTATE_V2_REQUIREMENT: &str = "dirstate-v2"; +pub const GENERALDELTA_REQUIREMENT: &str = "generaldelta"; /// A repository that uses the tracked hint dirstate file #[allow(unused)] @@ -128,11 +129,20 @@ #[allow(unused)] pub const TREEMANIFEST_REQUIREMENT: &str = "treemanifest"; +/// Whether to use the "RevlogNG" or V1 of the revlog format +#[allow(unused)] +pub const REVLOGV1_REQUIREMENT: &str = "revlogv1"; + /// Increment the sub-version when the revlog v2 format changes to lock out old /// clients. #[allow(unused)] pub const REVLOGV2_REQUIREMENT: &str = "exp-revlogv2.1"; +/// Increment the sub-version when the revlog v2 format changes to lock out old +/// clients. +#[allow(unused)] +pub const CHANGELOGV2_REQUIREMENT: &str = "exp-changelog-v2"; + /// A repository with the sparserevlog feature will have delta chains that /// can spread over a larger span. Sparse reading cuts these large spans into /// pieces, so that each piece isn't too big.
--- a/rust/hg-core/src/revlog/changelog.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/revlog/changelog.rs Mon Feb 12 16:22:47 2024 +0100 @@ -4,7 +4,7 @@ use crate::revlog::{Revlog, RevlogEntry, RevlogError}; use crate::utils::hg_path::HgPath; use crate::vfs::Vfs; -use crate::{Graph, GraphError, UncheckedRevision}; +use crate::{Graph, GraphError, RevlogOpenOptions, UncheckedRevision}; use itertools::{Either, Itertools}; use std::ascii::escape_default; use std::borrow::Cow; @@ -19,9 +19,11 @@ impl Changelog { /// Open the `changelog` of a repository given by its root. - pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { - let revlog = - Revlog::open(store_vfs, "00changelog.i", None, use_nodemap)?; + pub fn open( + store_vfs: &Vfs, + options: RevlogOpenOptions, + ) -> Result<Self, HgError> { + let revlog = Revlog::open(store_vfs, "00changelog.i", None, options)?; Ok(Self { revlog }) } @@ -349,7 +351,9 @@ let temp = tempfile::tempdir().unwrap(); let vfs = Vfs { base: temp.path() }; std::fs::write(temp.path().join("foo.i"), b"").unwrap(); - let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); + let revlog = + Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new()) + .unwrap(); let changelog = Changelog { revlog }; assert_eq!(
--- a/rust/hg-core/src/revlog/filelog.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/revlog/filelog.rs Mon Feb 12 16:22:47 2024 +0100 @@ -11,6 +11,7 @@ use crate::utils::SliceExt; use crate::Graph; use crate::GraphError; +use crate::RevlogOpenOptions; use crate::UncheckedRevision; use std::path::PathBuf; @@ -30,16 +31,21 @@ pub fn open_vfs( store_vfs: &crate::vfs::Vfs<'_>, file_path: &HgPath, + options: RevlogOpenOptions, ) -> Result<Self, HgError> { let index_path = store_path(file_path, b".i"); let data_path = store_path(file_path, b".d"); let revlog = - Revlog::open(store_vfs, index_path, Some(&data_path), false)?; + Revlog::open(store_vfs, index_path, Some(&data_path), options)?; Ok(Self { revlog }) } - pub fn open(repo: &Repo, file_path: &HgPath) -> Result<Self, HgError> { - Self::open_vfs(&repo.store_vfs(), file_path) + pub fn open( + repo: &Repo, + file_path: &HgPath, + options: RevlogOpenOptions, + ) -> Result<Self, HgError> { + Self::open_vfs(&repo.store_vfs(), file_path, options) } /// The given node ID is that of the file as found in a filelog, not of a
--- a/rust/hg-core/src/revlog/index.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/revlog/index.rs Mon Feb 12 16:22:47 2024 +0100 @@ -1,17 +1,28 @@ +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::ops::Deref; +use std::sync::{RwLock, RwLockReadGuard, RwLockWriteGuard}; +use bitvec::prelude::*; use byteorder::{BigEndian, ByteOrder}; +use bytes_cast::{unaligned, BytesCast}; +use super::REVIDX_KNOWN_FLAGS; use crate::errors::HgError; +use crate::node::{NODE_BYTES_LENGTH, NULL_NODE, STORED_NODE_ID_BYTES}; use crate::revlog::node::Node; use crate::revlog::{Revision, NULL_REVISION}; -use crate::{Graph, GraphError, RevlogIndex, UncheckedRevision}; +use crate::{ + dagops, BaseRevision, FastHashMap, Graph, GraphError, RevlogError, + RevlogIndex, UncheckedRevision, +}; pub const INDEX_ENTRY_SIZE: usize = 64; +pub const COMPRESSION_MODE_INLINE: u8 = 2; +#[derive(Debug)] pub struct IndexHeader { - header_bytes: [u8; 4], + pub(super) header_bytes: [u8; 4], } #[derive(Copy, Clone)] @@ -50,42 +61,213 @@ BigEndian::read_u16(&self.header_bytes[2..4]) } - const EMPTY_INDEX_HEADER: IndexHeader = IndexHeader { - // We treat an empty file as a valid index with no entries. - // Here we make an arbitrary choice of what we assume the format of the - // index to be (V1, using generaldelta). - // This doesn't matter too much, since we're only doing read-only - // access. but the value corresponds to the `new_header` variable in - // `revlog.py`, `_loadindex` - header_bytes: [0, 3, 0, 1], - }; - - fn parse(index_bytes: &[u8]) -> Result<IndexHeader, HgError> { + pub fn parse(index_bytes: &[u8]) -> Result<Option<IndexHeader>, HgError> { if index_bytes.is_empty() { - return Ok(IndexHeader::EMPTY_INDEX_HEADER); + return Ok(None); } if index_bytes.len() < 4 { return Err(HgError::corrupted( "corrupted revlog: can't read the index format header", )); } - Ok(IndexHeader { + Ok(Some(IndexHeader { header_bytes: { let bytes: [u8; 4] = index_bytes[0..4].try_into().expect("impossible"); bytes }, - }) + })) + } +} + +/// Abstracts the access to the index bytes since they can be spread between +/// the immutable (bytes) part and the mutable (added) part if any appends +/// happened. This makes it transparent for the callers. +struct IndexData { + /// Immutable bytes, most likely taken from disk + bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>, + /// Used when stripping index contents, keeps track of the start of the + /// first stripped revision, which is used to give a slice of the + /// `bytes` field. + truncation: Option<usize>, + /// Bytes that were added after reading the index + added: Vec<u8>, +} + +impl IndexData { + pub fn new(bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>) -> Self { + Self { + bytes, + truncation: None, + added: vec![], + } + } + + pub fn len(&self) -> usize { + match self.truncation { + Some(truncation) => truncation + self.added.len(), + None => self.bytes.len() + self.added.len(), + } + } + + fn remove( + &mut self, + rev: Revision, + offsets: Option<&[usize]>, + ) -> Result<(), RevlogError> { + let rev = rev.0 as usize; + let truncation = if let Some(offsets) = offsets { + offsets[rev] + } else { + rev * INDEX_ENTRY_SIZE + }; + if truncation < self.bytes.len() { + self.truncation = Some(truncation); + self.added.clear(); + } else { + self.added.truncate(truncation - self.bytes.len()); + } + Ok(()) + } + + fn is_new(&self) -> bool { + self.bytes.is_empty() + } +} + +impl std::ops::Index<std::ops::Range<usize>> for IndexData { + type Output = [u8]; + + fn index(&self, index: std::ops::Range<usize>) -> &Self::Output { + let start = index.start; + let end = index.end; + let immutable_len = match self.truncation { + Some(truncation) => truncation, + None => self.bytes.len(), + }; + if start < immutable_len { + if end > immutable_len { + panic!("index data cannot span existing and added ranges"); + } + &self.bytes[index] + } else { + &self.added[start - immutable_len..end - immutable_len] + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub struct RevisionDataParams { + pub flags: u16, + pub data_offset: u64, + pub data_compressed_length: i32, + pub data_uncompressed_length: i32, + pub data_delta_base: i32, + pub link_rev: i32, + pub parent_rev_1: i32, + pub parent_rev_2: i32, + pub node_id: [u8; NODE_BYTES_LENGTH], + pub _sidedata_offset: u64, + pub _sidedata_compressed_length: i32, + pub data_compression_mode: u8, + pub _sidedata_compression_mode: u8, + pub _rank: i32, +} + +impl Default for RevisionDataParams { + fn default() -> Self { + Self { + flags: 0, + data_offset: 0, + data_compressed_length: 0, + data_uncompressed_length: 0, + data_delta_base: -1, + link_rev: -1, + parent_rev_1: -1, + parent_rev_2: -1, + node_id: [0; NODE_BYTES_LENGTH], + _sidedata_offset: 0, + _sidedata_compressed_length: 0, + data_compression_mode: COMPRESSION_MODE_INLINE, + _sidedata_compression_mode: COMPRESSION_MODE_INLINE, + _rank: -1, + } + } +} + +#[derive(BytesCast)] +#[repr(C)] +pub struct RevisionDataV1 { + data_offset_or_flags: unaligned::U64Be, + data_compressed_length: unaligned::I32Be, + data_uncompressed_length: unaligned::I32Be, + data_delta_base: unaligned::I32Be, + link_rev: unaligned::I32Be, + parent_rev_1: unaligned::I32Be, + parent_rev_2: unaligned::I32Be, + node_id: [u8; STORED_NODE_ID_BYTES], +} + +fn _static_assert_size_of_revision_data_v1() { + let _ = std::mem::transmute::<RevisionDataV1, [u8; 64]>; +} + +impl RevisionDataParams { + pub fn validate(&self) -> Result<(), RevlogError> { + if self.flags & !REVIDX_KNOWN_FLAGS != 0 { + return Err(RevlogError::corrupted(format!( + "unknown revlog index flags: {}", + self.flags + ))); + } + if self.data_compression_mode != COMPRESSION_MODE_INLINE { + return Err(RevlogError::corrupted(format!( + "invalid data compression mode: {}", + self.data_compression_mode + ))); + } + // FIXME isn't this only for v2 or changelog v2? + if self._sidedata_compression_mode != COMPRESSION_MODE_INLINE { + return Err(RevlogError::corrupted(format!( + "invalid sidedata compression mode: {}", + self._sidedata_compression_mode + ))); + } + Ok(()) + } + + pub fn into_v1(self) -> RevisionDataV1 { + let data_offset_or_flags = self.data_offset << 16 | self.flags as u64; + let mut node_id = [0; STORED_NODE_ID_BYTES]; + node_id[..NODE_BYTES_LENGTH].copy_from_slice(&self.node_id); + RevisionDataV1 { + data_offset_or_flags: data_offset_or_flags.into(), + data_compressed_length: self.data_compressed_length.into(), + data_uncompressed_length: self.data_uncompressed_length.into(), + data_delta_base: self.data_delta_base.into(), + link_rev: self.link_rev.into(), + parent_rev_1: self.parent_rev_1.into(), + parent_rev_2: self.parent_rev_2.into(), + node_id, + } } } /// A Revlog index pub struct Index { - bytes: Box<dyn Deref<Target = [u8]> + Send>, + bytes: IndexData, /// Offsets of starts of index blocks. /// Only needed when the index is interleaved with data. - offsets: Option<Vec<usize>>, + offsets: RwLock<Option<Vec<usize>>>, uses_generaldelta: bool, + is_inline: bool, + /// Cache of (head_revisions, filtered_revisions) + /// + /// The head revisions in this index, kept in sync. Should + /// be accessed via the [`Self::head_revs`] method. + /// The last filtered revisions in this index, used to make sure + /// we haven't changed filters when returning the cached `head_revs`. + head_revs: RwLock<(Vec<Revision>, HashSet<Revision>)>, } impl Debug for Index { @@ -98,6 +280,7 @@ } impl Graph for Index { + #[inline(always)] fn parents(&self, rev: Revision) -> Result<[Revision; 2], GraphError> { let err = || GraphError::ParentOutOfRange(rev); match self.get_entry(rev) { @@ -114,13 +297,44 @@ } } +/// A cache suitable for find_snapshots +/// +/// Logically equivalent to a mapping whose keys are [`BaseRevision`] and +/// values sets of [`BaseRevision`] +/// +/// TODO the dubious part is insisting that errors must be RevlogError +/// we would probably need to sprinkle some magic here, such as an associated +/// type that would be Into<RevlogError> but even that would not be +/// satisfactory, as errors potentially have nothing to do with the revlog. +pub trait SnapshotsCache { + fn insert_for( + &mut self, + rev: BaseRevision, + value: BaseRevision, + ) -> Result<(), RevlogError>; +} + +impl SnapshotsCache for FastHashMap<BaseRevision, HashSet<BaseRevision>> { + fn insert_for( + &mut self, + rev: BaseRevision, + value: BaseRevision, + ) -> Result<(), RevlogError> { + let all_values = self.entry(rev).or_default(); + all_values.insert(value); + Ok(()) + } +} + impl Index { /// Create an index from bytes. /// Calculate the start of each entry when is_inline is true. pub fn new( - bytes: Box<dyn Deref<Target = [u8]> + Send>, + bytes: Box<dyn Deref<Target = [u8]> + Send + Sync>, + default_header: IndexHeader, ) -> Result<Self, HgError> { - let header = IndexHeader::parse(bytes.as_ref())?; + let header = + IndexHeader::parse(bytes.as_ref())?.unwrap_or(default_header); if header.format_version() != IndexHeader::REVLOGV1 { // A proper new version should have had a repo/store @@ -150,18 +364,22 @@ if offset == bytes.len() { Ok(Self { - bytes, - offsets: Some(offsets), + bytes: IndexData::new(bytes), + offsets: RwLock::new(Some(offsets)), uses_generaldelta, + is_inline: true, + head_revs: RwLock::new((vec![], HashSet::new())), }) } else { Err(HgError::corrupted("unexpected inline revlog length")) } } else { Ok(Self { - bytes, - offsets: None, + bytes: IndexData::new(bytes), + offsets: RwLock::new(None), uses_generaldelta, + is_inline: false, + head_revs: RwLock::new((vec![], HashSet::new())), }) } } @@ -172,7 +390,7 @@ /// Value of the inline flag. pub fn is_inline(&self) -> bool { - self.offsets.is_some() + self.is_inline } /// Return a slice of bytes if `revlog` is inline. Panic if not. @@ -185,36 +403,111 @@ /// Return number of entries of the revlog index. pub fn len(&self) -> usize { - if let Some(offsets) = &self.offsets { - offsets.len() + if self.is_inline() { + (*self.get_offsets()) + .as_ref() + .expect("inline should have offsets") + .len() } else { self.bytes.len() / INDEX_ENTRY_SIZE } } + pub fn get_offsets(&self) -> RwLockReadGuard<Option<Vec<usize>>> { + assert!(self.is_inline()); + { + // Wrap in a block to drop the read guard + // TODO perf? + let mut offsets = self.offsets.write().unwrap(); + if offsets.is_none() { + offsets.replace(inline_scan(&self.bytes.bytes).1); + } + } + self.offsets.read().unwrap() + } + + pub fn get_offsets_mut(&mut self) -> RwLockWriteGuard<Option<Vec<usize>>> { + assert!(self.is_inline()); + let mut offsets = self.offsets.write().unwrap(); + if offsets.is_none() { + offsets.replace(inline_scan(&self.bytes.bytes).1); + } + offsets + } + /// Returns `true` if the `Index` has zero `entries`. pub fn is_empty(&self) -> bool { self.len() == 0 } - /// Return the index entry corresponding to the given revision if it - /// exists. + /// Return the index entry corresponding to the given revision or `None` + /// for [`NULL_REVISION`] + /// + /// The specified revision being of the checked type, it always exists + /// if it was validated by this index. pub fn get_entry(&self, rev: Revision) -> Option<IndexEntry> { if rev == NULL_REVISION { return None; } - Some(if let Some(offsets) = &self.offsets { - self.get_entry_inline(rev, offsets) + Some(if self.is_inline() { + self.get_entry_inline(rev) } else { self.get_entry_separated(rev) }) } - fn get_entry_inline( + /// Return the binary content of the index entry for the given revision + /// + /// See [get_entry()](`Self::get_entry()`) for cases when `None` is + /// returned. + pub fn entry_binary(&self, rev: Revision) -> Option<&[u8]> { + self.get_entry(rev).map(|e| { + let bytes = e.as_bytes(); + if rev.0 == 0 { + &bytes[4..] + } else { + bytes + } + }) + } + + pub fn entry_as_params( &self, - rev: Revision, - offsets: &[usize], - ) -> IndexEntry { + rev: UncheckedRevision, + ) -> Option<RevisionDataParams> { + let rev = self.check_revision(rev)?; + self.get_entry(rev).map(|e| RevisionDataParams { + flags: e.flags(), + data_offset: if rev.0 == 0 && !self.bytes.is_new() { + e.flags() as u64 + } else { + e.raw_offset() + }, + data_compressed_length: e + .compressed_len() + .try_into() + .unwrap_or_else(|_| { + // Python's `unionrepo` sets the compressed length to be + // `-1` (or `u32::MAX` if transmuted to `u32`) because it + // cannot know the correct compressed length of a given + // revision. I'm not sure if this is true, but having this + // edge case won't hurt other use cases, let's handle it. + assert_eq!(e.compressed_len(), u32::MAX); + NULL_REVISION.0 + }), + data_uncompressed_length: e.uncompressed_len(), + data_delta_base: e.base_revision_or_base_of_delta_chain().0, + link_rev: e.link_revision().0, + parent_rev_1: e.p1().0, + parent_rev_2: e.p2().0, + node_id: e.hash().as_bytes().try_into().unwrap(), + ..Default::default() + }) + } + + fn get_entry_inline(&self, rev: Revision) -> IndexEntry { + let offsets = &self.get_offsets(); + let offsets = offsets.as_ref().expect("inline should have offsets"); let start = offsets[rev.0 as usize]; let end = start + INDEX_ENTRY_SIZE; let bytes = &self.bytes[start..end]; @@ -242,6 +535,1087 @@ offset_override, } } + + fn null_entry(&self) -> IndexEntry { + IndexEntry { + bytes: &[0; INDEX_ENTRY_SIZE], + offset_override: Some(0), + } + } + + /// Return the head revisions of this index + pub fn head_revs(&self) -> Result<Vec<Revision>, GraphError> { + self.head_revs_filtered(&HashSet::new(), false) + .map(|h| h.unwrap()) + } + + /// Python-specific shortcut to save on PyList creation + pub fn head_revs_shortcut( + &self, + ) -> Result<Option<Vec<Revision>>, GraphError> { + self.head_revs_filtered(&HashSet::new(), true) + } + + /// Return the head revisions of this index + pub fn head_revs_filtered( + &self, + filtered_revs: &HashSet<Revision>, + py_shortcut: bool, + ) -> Result<Option<Vec<Revision>>, GraphError> { + { + let guard = self + .head_revs + .read() + .expect("RwLock on Index.head_revs should not be poisoned"); + let self_head_revs = &guard.0; + let self_filtered_revs = &guard.1; + if !self_head_revs.is_empty() + && filtered_revs == self_filtered_revs + { + if py_shortcut { + // Don't copy the revs since we've already cached them + // on the Python side. + return Ok(None); + } else { + return Ok(Some(self_head_revs.to_owned())); + } + } + } + + let as_vec = if self.is_empty() { + vec![NULL_REVISION] + } else { + let mut not_heads = bitvec![0; self.len()]; + dagops::retain_heads_fast( + self, + not_heads.as_mut_bitslice(), + filtered_revs, + )?; + not_heads + .into_iter() + .enumerate() + .filter_map(|(idx, is_not_head)| { + if is_not_head { + None + } else { + Some(Revision(idx as BaseRevision)) + } + }) + .collect() + }; + *self + .head_revs + .write() + .expect("RwLock on Index.head_revs should not be poisoned") = + (as_vec.to_owned(), filtered_revs.to_owned()); + Ok(Some(as_vec)) + } + + /// Obtain the delta chain for a revision. + /// + /// `stop_rev` specifies a revision to stop at. If not specified, we + /// stop at the base of the chain. + /// + /// Returns a 2-tuple of (chain, stopped) where `chain` is a vec of + /// revs in ascending order and `stopped` is a bool indicating whether + /// `stoprev` was hit. + pub fn delta_chain( + &self, + rev: Revision, + stop_rev: Option<Revision>, + using_general_delta: Option<bool>, + ) -> Result<(Vec<Revision>, bool), HgError> { + let mut current_rev = rev; + let mut entry = self.get_entry(rev).unwrap(); + let mut chain = vec![]; + let using_general_delta = + using_general_delta.unwrap_or_else(|| self.uses_generaldelta()); + while current_rev.0 != entry.base_revision_or_base_of_delta_chain().0 + && stop_rev.map(|r| r != current_rev).unwrap_or(true) + { + chain.push(current_rev); + let new_rev = if using_general_delta { + entry.base_revision_or_base_of_delta_chain() + } else { + UncheckedRevision(current_rev.0 - 1) + }; + current_rev = self.check_revision(new_rev).ok_or_else(|| { + HgError::corrupted(format!("Revision {new_rev} out of range")) + })?; + if current_rev.0 == NULL_REVISION.0 { + break; + } + entry = self.get_entry(current_rev).unwrap() + } + + let stopped = if stop_rev.map(|r| current_rev == r).unwrap_or(false) { + true + } else { + chain.push(current_rev); + false + }; + chain.reverse(); + Ok((chain, stopped)) + } + + pub fn find_snapshots( + &self, + start_rev: UncheckedRevision, + end_rev: UncheckedRevision, + cache: &mut impl SnapshotsCache, + ) -> Result<(), RevlogError> { + let mut start_rev = start_rev.0; + let mut end_rev = end_rev.0; + end_rev += 1; + let len = self.len().try_into().unwrap(); + if end_rev > len { + end_rev = len; + } + if start_rev < 0 { + start_rev = 0; + } + for rev in start_rev..end_rev { + if !self.is_snapshot_unchecked(Revision(rev))? { + continue; + } + let mut base = self + .get_entry(Revision(rev)) + .unwrap() + .base_revision_or_base_of_delta_chain(); + if base.0 == rev { + base = NULL_REVISION.into(); + } + cache.insert_for(base.0, rev)?; + } + Ok(()) + } + + fn clear_head_revs(&self) { + self.head_revs + .write() + .expect("RwLock on Index.head_revs should not be poisoined") + .0 + .clear() + } + + /// TODO move this to the trait probably, along with other things + pub fn append( + &mut self, + revision_data: RevisionDataParams, + ) -> Result<(), RevlogError> { + revision_data.validate()?; + if self.is_inline() { + let new_offset = self.bytes.len(); + if let Some(offsets) = &mut *self.get_offsets_mut() { + offsets.push(new_offset) + } + } + self.bytes.added.extend(revision_data.into_v1().as_bytes()); + self.clear_head_revs(); + Ok(()) + } + + pub fn pack_header(&self, header: i32) -> [u8; 4] { + header.to_be_bytes() + } + + pub fn remove(&mut self, rev: Revision) -> Result<(), RevlogError> { + let offsets = if self.is_inline() { + self.get_offsets().clone() + } else { + None + }; + self.bytes.remove(rev, offsets.as_deref())?; + if self.is_inline() { + if let Some(offsets) = &mut *self.get_offsets_mut() { + offsets.truncate(rev.0 as usize) + } + } + self.clear_head_revs(); + Ok(()) + } + + pub fn clear_caches(&self) { + // We need to get the 'inline' value from Python at init and use this + // instead of offsets to determine whether we're inline since we might + // clear caches. This implies re-populating the offsets on-demand. + *self + .offsets + .write() + .expect("RwLock on Index.offsets should not be poisoed") = None; + self.clear_head_revs(); + } + + /// Unchecked version of `is_snapshot`. + /// Assumes the caller checked that `rev` is within a valid revision range. + pub fn is_snapshot_unchecked( + &self, + mut rev: Revision, + ) -> Result<bool, RevlogError> { + while rev.0 >= 0 { + let entry = self.get_entry(rev).unwrap(); + let mut base = entry.base_revision_or_base_of_delta_chain().0; + if base == rev.0 { + base = NULL_REVISION.0; + } + if base == NULL_REVISION.0 { + return Ok(true); + } + let [mut p1, mut p2] = self + .parents(rev) + .map_err(|_| RevlogError::InvalidRevision)?; + while let Some(p1_entry) = self.get_entry(p1) { + if p1_entry.compressed_len() != 0 || p1.0 == 0 { + break; + } + let parent_base = + p1_entry.base_revision_or_base_of_delta_chain(); + if parent_base.0 == p1.0 { + break; + } + p1 = self + .check_revision(parent_base) + .ok_or(RevlogError::InvalidRevision)?; + } + while let Some(p2_entry) = self.get_entry(p2) { + if p2_entry.compressed_len() != 0 || p2.0 == 0 { + break; + } + let parent_base = + p2_entry.base_revision_or_base_of_delta_chain(); + if parent_base.0 == p2.0 { + break; + } + p2 = self + .check_revision(parent_base) + .ok_or(RevlogError::InvalidRevision)?; + } + if base == p1.0 || base == p2.0 { + return Ok(false); + } + rev = self + .check_revision(base.into()) + .ok_or(RevlogError::InvalidRevision)?; + } + Ok(rev == NULL_REVISION) + } + + /// Return whether the given revision is a snapshot. Returns an error if + /// `rev` is not within a valid revision range. + pub fn is_snapshot( + &self, + rev: UncheckedRevision, + ) -> Result<bool, RevlogError> { + let rev = self + .check_revision(rev) + .ok_or_else(|| RevlogError::corrupted("test"))?; + self.is_snapshot_unchecked(rev) + } + + /// Slice revs to reduce the amount of unrelated data to be read from disk. + /// + /// The index is sliced into groups that should be read in one time. + /// + /// The initial chunk is sliced until the overall density + /// (payload/chunks-span ratio) is above `target_density`. + /// No gap smaller than `min_gap_size` is skipped. + pub fn slice_chunk_to_density( + &self, + revs: &[Revision], + target_density: f64, + min_gap_size: usize, + ) -> Vec<Vec<Revision>> { + if revs.is_empty() { + return vec![]; + } + if revs.len() == 1 { + return vec![revs.to_owned()]; + } + let delta_chain_span = self.segment_span(revs); + if delta_chain_span < min_gap_size { + return vec![revs.to_owned()]; + } + let entries: Vec<_> = revs + .iter() + .map(|r| { + (*r, self.get_entry(*r).unwrap_or_else(|| self.null_entry())) + }) + .collect(); + + let mut read_data = delta_chain_span; + let chain_payload: u32 = + entries.iter().map(|(_r, e)| e.compressed_len()).sum(); + let mut density = if delta_chain_span > 0 { + chain_payload as f64 / delta_chain_span as f64 + } else { + 1.0 + }; + + if density >= target_density { + return vec![revs.to_owned()]; + } + + // Store the gaps in a heap to have them sorted by decreasing size + let mut gaps = Vec::new(); + let mut previous_end = None; + + for (i, (_rev, entry)) in entries.iter().enumerate() { + let start = entry.c_start() as usize; + let length = entry.compressed_len(); + + // Skip empty revisions to form larger holes + if length == 0 { + continue; + } + + if let Some(end) = previous_end { + let gap_size = start - end; + // Only consider holes that are large enough + if gap_size > min_gap_size { + gaps.push((gap_size, i)); + } + } + previous_end = Some(start + length as usize); + } + if gaps.is_empty() { + return vec![revs.to_owned()]; + } + // sort the gaps to pop them from largest to small + gaps.sort_unstable(); + + // Collect the indices of the largest holes until + // the density is acceptable + let mut selected = vec![]; + while let Some((gap_size, gap_id)) = gaps.pop() { + if density >= target_density { + break; + } + selected.push(gap_id); + + // The gap sizes are stored as negatives to be sorted decreasingly + // by the heap + read_data -= gap_size; + density = if read_data > 0 { + chain_payload as f64 / read_data as f64 + } else { + 1.0 + }; + if density >= target_density { + break; + } + } + selected.sort_unstable(); + selected.push(revs.len()); + + // Cut the revs at collected indices + let mut previous_idx = 0; + let mut chunks = vec![]; + for idx in selected { + let chunk = self.trim_chunk(&entries, previous_idx, idx); + if !chunk.is_empty() { + chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect()); + } + previous_idx = idx; + } + let chunk = self.trim_chunk(&entries, previous_idx, entries.len()); + if !chunk.is_empty() { + chunks.push(chunk.iter().map(|(rev, _entry)| *rev).collect()); + } + + chunks + } + + /// Get the byte span of a segment of sorted revisions. + /// + /// Occurrences of [`NULL_REVISION`] are ignored at the beginning of + /// the `revs` segment. + /// + /// panics: + /// - if `revs` is empty or only made of `NULL_REVISION` + /// - if cannot retrieve entry for the last or first not null element of + /// `revs`. + fn segment_span(&self, revs: &[Revision]) -> usize { + if revs.is_empty() { + return 0; + } + let last_entry = &self.get_entry(revs[revs.len() - 1]).unwrap(); + let end = last_entry.c_start() + last_entry.compressed_len() as u64; + let first_rev = revs.iter().find(|r| r.0 != NULL_REVISION.0).unwrap(); + let start = if first_rev.0 == 0 { + 0 + } else { + self.get_entry(*first_rev).unwrap().c_start() + }; + (end - start) as usize + } + + /// Returns `&revs[startidx..endidx]` without empty trailing revs + fn trim_chunk<'a>( + &'a self, + revs: &'a [(Revision, IndexEntry)], + start: usize, + mut end: usize, + ) -> &'a [(Revision, IndexEntry)] { + // Trim empty revs at the end, except the very first rev of a chain + let last_rev = revs[end - 1].0; + if last_rev.0 < self.len() as BaseRevision { + while end > 1 + && end > start + && revs[end - 1].1.compressed_len() == 0 + { + end -= 1 + } + } + &revs[start..end] + } + + /// Computes the set of revisions for each non-public phase from `roots`, + /// which are the last known roots for each non-public phase. + pub fn compute_phases_map_sets( + &self, + roots: HashMap<Phase, Vec<Revision>>, + ) -> Result<(usize, RootsPerPhase), GraphError> { + let mut phases = HashMap::new(); + let mut min_phase_rev = NULL_REVISION; + + for phase in Phase::non_public_phases() { + if let Some(phase_roots) = roots.get(phase) { + let min_rev = + self.add_roots_get_min(phase_roots, &mut phases, *phase); + if min_rev != NULL_REVISION + && (min_phase_rev == NULL_REVISION + || min_rev < min_phase_rev) + { + min_phase_rev = min_rev; + } + } else { + continue; + }; + } + let mut phase_sets: RootsPerPhase = Default::default(); + + if min_phase_rev == NULL_REVISION { + min_phase_rev = Revision(self.len() as BaseRevision); + } + + for rev in min_phase_rev.0..self.len() as BaseRevision { + let rev = Revision(rev); + let [p1, p2] = self.parents(rev)?; + + const DEFAULT_PHASE: &Phase = &Phase::Public; + if p1.0 >= 0 + && phases.get(&p1).unwrap_or(DEFAULT_PHASE) + > phases.get(&rev).unwrap_or(DEFAULT_PHASE) + { + phases.insert(rev, phases[&p1]); + } + if p2.0 >= 0 + && phases.get(&p2).unwrap_or(DEFAULT_PHASE) + > phases.get(&rev).unwrap_or(DEFAULT_PHASE) + { + phases.insert(rev, phases[&p2]); + } + let set = match phases.get(&rev).unwrap_or(DEFAULT_PHASE) { + Phase::Public => continue, + phase => &mut phase_sets[*phase as usize - 1], + }; + set.insert(rev); + } + + Ok((self.len(), phase_sets)) + } + + fn add_roots_get_min( + &self, + phase_roots: &[Revision], + phases: &mut HashMap<Revision, Phase>, + phase: Phase, + ) -> Revision { + let mut min_rev = NULL_REVISION; + + for root in phase_roots { + phases.insert(*root, phase); + if min_rev == NULL_REVISION || min_rev > *root { + min_rev = *root; + } + } + min_rev + } + + /// Return `(heads(::(<roots> and <roots>::<heads>)))` + /// If `include_path` is `true`, return `(<roots>::<heads>)`.""" + /// + /// `min_root` and `roots` are unchecked since they are just used as + /// a bound or for comparison and don't need to represent a valid revision. + /// In practice, the only invalid revision passed is the working directory + /// revision ([`i32::MAX`]). + pub fn reachable_roots( + &self, + min_root: UncheckedRevision, + mut heads: Vec<Revision>, + roots: HashSet<UncheckedRevision>, + include_path: bool, + ) -> Result<HashSet<Revision>, GraphError> { + if roots.is_empty() { + return Ok(HashSet::new()); + } + let mut reachable = HashSet::new(); + let mut seen = HashMap::new(); + + while let Some(rev) = heads.pop() { + if roots.contains(&rev.into()) { + reachable.insert(rev); + if !include_path { + continue; + } + } + let parents = self.parents(rev)?; + seen.insert(rev, parents); + for parent in parents { + if parent.0 >= min_root.0 && !seen.contains_key(&parent) { + heads.push(parent); + } + } + } + if !include_path { + return Ok(reachable); + } + let mut revs: Vec<_> = seen.keys().collect(); + revs.sort_unstable(); + for rev in revs { + for parent in seen[rev] { + if reachable.contains(&parent) { + reachable.insert(*rev); + } + } + } + Ok(reachable) + } + + /// Given a (possibly overlapping) set of revs, return all the + /// common ancestors heads: `heads(::args[0] and ::a[1] and ...)` + pub fn common_ancestor_heads( + &self, + revisions: &[Revision], + ) -> Result<Vec<Revision>, GraphError> { + // given that revisions is expected to be small, we find this shortcut + // potentially acceptable, especially given that `hg-cpython` could + // very much bypass this, constructing a vector of unique values from + // the onset. + let as_set: HashSet<Revision> = revisions.iter().copied().collect(); + // Besides deduplicating, the C version also implements the shortcut + // for `NULL_REVISION`: + if as_set.contains(&NULL_REVISION) { + return Ok(vec![]); + } + + let revisions: Vec<Revision> = as_set.into_iter().collect(); + + if revisions.len() < 8 { + self.find_gca_candidates::<u8>(&revisions) + } else if revisions.len() < 64 { + self.find_gca_candidates::<u64>(&revisions) + } else { + self.find_gca_candidates::<NonStaticPoisonableBitSet>(&revisions) + } + } + + pub fn ancestors( + &self, + revisions: &[Revision], + ) -> Result<Vec<Revision>, GraphError> { + self.find_deepest_revs(&self.common_ancestor_heads(revisions)?) + } + + /// Given a disjoint set of revs, return all candidates for the + /// greatest common ancestor. In revset notation, this is the set + /// `heads(::a and ::b and ...)` + fn find_gca_candidates<BS: PoisonableBitSet + Clone>( + &self, + revs: &[Revision], + ) -> Result<Vec<Revision>, GraphError> { + if revs.is_empty() { + return Ok(vec![]); + } + let revcount = revs.len(); + let mut candidates = vec![]; + let max_rev = revs.iter().max().unwrap(); + + let mut seen = BS::vec_of_empty(revs.len(), (max_rev.0 + 1) as usize); + + for (idx, rev) in revs.iter().enumerate() { + seen[rev.0 as usize].add(idx); + } + let mut current_rev = *max_rev; + // Number of revisions whose inspection in the main loop + // will give a result or trigger inspection of other revisions + let mut interesting = revcount; + + // The algorithm works on a vector of bit sets, indexed by revision + // numbers and iterated on reverse order. + // An entry in this vector is poisoned if and only if the corresponding + // revision is a common, yet not maximal ancestor. + + // The principle of the algorithm is as follows: + // For a revision `r`, when entering the loop, `seen[r]` is either + // poisoned or the sub set of `revs` of which `r` is an ancestor. + // In this sub set is full, then `r` is a solution and its parents + // have to be poisoned. + // + // At each iteration, the bit sets of the parents are updated by + // union with `seen[r]`. + // As we walk the index from the end, we are sure we have encountered + // all children of `r` before `r`, hence we know that `seen[r]` is + // fully computed. + // + // On top of that there are several optimizations that make reading + // less obvious than the comment above: + // - The `interesting` counter allows to break early + // - The loop starts from `max(revs)` + // - Early return in case it is detected that one of the incoming revs + // is a common ancestor of all of them. + while current_rev.0 >= 0 && interesting > 0 { + let current_seen = seen[current_rev.0 as usize].clone(); + + if current_seen.is_empty() { + current_rev = Revision(current_rev.0 - 1); + continue; + } + let mut poison = current_seen.is_poisoned(); + if !poison { + interesting -= 1; + if current_seen.is_full_range(revcount) { + candidates.push(current_rev); + poison = true; + + // Being a common ancestor, if `current_rev` is among + // the input revisions, it is *the* answer. + for rev in revs { + if *rev == current_rev { + return Ok(candidates); + } + } + } + } + for parent in self.parents(current_rev)? { + if parent == NULL_REVISION { + continue; + } + let parent_seen = &mut seen[parent.0 as usize]; + if poison { + // this block is logically equivalent to poisoning parent + // and counting it as non interesting if it + // has been seen before (hence counted then as interesting) + if !parent_seen.is_empty() && !parent_seen.is_poisoned() { + interesting -= 1; + } + parent_seen.poison(); + } else { + if parent_seen.is_empty() { + interesting += 1; + } + parent_seen.union(¤t_seen); + } + } + + current_rev = Revision(current_rev.0 - 1); + } + + Ok(candidates) + } + + /// Given a disjoint set of revs, return the subset with the longest path + /// to the root. + fn find_deepest_revs( + &self, + revs: &[Revision], + ) -> Result<Vec<Revision>, GraphError> { + // TODO replace this all with just comparing rank? + // Also, the original implementations in C/Python are cryptic, not + // even sure we actually need this? + if revs.len() <= 1 { + return Ok(revs.to_owned()); + } + let max_rev = revs.iter().max().unwrap().0; + let mut interesting = HashMap::new(); + let mut seen = vec![0; max_rev as usize + 1]; + let mut depth = vec![0; max_rev as usize + 1]; + let mut mapping = vec![]; + let mut revs = revs.to_owned(); + revs.sort_unstable(); + + for (idx, rev) in revs.iter().enumerate() { + depth[rev.0 as usize] = 1; + let shift = 1 << idx; + seen[rev.0 as usize] = shift; + interesting.insert(shift, 1); + mapping.push((shift, *rev)); + } + + let mut current_rev = Revision(max_rev); + while current_rev.0 >= 0 && interesting.len() > 1 { + let current_depth = depth[current_rev.0 as usize]; + if current_depth == 0 { + current_rev = Revision(current_rev.0 - 1); + continue; + } + + let current_seen = seen[current_rev.0 as usize]; + for parent in self.parents(current_rev)? { + if parent == NULL_REVISION { + continue; + } + let parent_seen = seen[parent.0 as usize]; + let parent_depth = depth[parent.0 as usize]; + if parent_depth <= current_depth { + depth[parent.0 as usize] = current_depth + 1; + if parent_seen != current_seen { + *interesting.get_mut(¤t_seen).unwrap() += 1; + seen[parent.0 as usize] = current_seen; + if parent_seen != 0 { + let parent_interesting = + interesting.get_mut(&parent_seen).unwrap(); + *parent_interesting -= 1; + if *parent_interesting == 0 { + interesting.remove(&parent_seen); + } + } + } + } else if current_depth == parent_depth - 1 { + let either_seen = parent_seen | current_seen; + if either_seen == parent_seen { + continue; + } + seen[parent.0 as usize] = either_seen; + interesting + .entry(either_seen) + .and_modify(|v| *v += 1) + .or_insert(1); + *interesting.get_mut(&parent_seen).unwrap() -= 1; + if interesting[&parent_seen] == 0 { + interesting.remove(&parent_seen); + } + } + } + *interesting.get_mut(¤t_seen).unwrap() -= 1; + if interesting[¤t_seen] == 0 { + interesting.remove(¤t_seen); + } + + current_rev = Revision(current_rev.0 - 1); + } + + if interesting.len() != 1 { + return Ok(vec![]); + } + let mask = interesting.keys().next().unwrap(); + + Ok(mapping + .into_iter() + .filter_map(|(shift, rev)| { + if (mask & shift) != 0 { + return Some(rev); + } + None + }) + .collect()) + } +} + +/// The kind of functionality needed by find_gca_candidates +/// +/// This is a bit mask which can be declared to be "poisoned", which callers +/// interpret to break out of some loops. +/// +/// The maximum capacity of the bit mask is up to the actual implementation +trait PoisonableBitSet: Sized + PartialEq { + /// Return a vector of exactly n elements, initialized to be empty. + /// + /// Optimization can vastly depend on implementation. Those being `Copy` + /// and having constant capacity typically can have a very simple + /// implementation. + fn vec_of_empty(sets_size: usize, vec_len: usize) -> Vec<Self>; + + /// The size of the bit mask in memory + fn size(&self) -> usize; + + /// The number of elements that can be represented in the set. + /// + /// Another way to put it is that it is the highest integer `C` such that + /// the set is guaranteed to always be a subset of the integer range + /// `[0, C)` + fn capacity(&self) -> usize; + + /// Declare `n` to belong to the set + fn add(&mut self, n: usize); + + /// Declare `n` not to belong to the set + fn discard(&mut self, n: usize); + + /// Replace this bit set by its union with other + fn union(&mut self, other: &Self); + + /// Poison the bit set + /// + /// Interpretation up to the caller + fn poison(&mut self); + + /// Is the bit set poisoned? + /// + /// Interpretation is up to the caller + fn is_poisoned(&self) -> bool; + + /// Is the bit set empty? + fn is_empty(&self) -> bool; + + /// return `true` if and only if the bit is the full range `[0, n)` + /// of integers + fn is_full_range(&self, n: usize) -> bool; +} + +const U64_POISON: u64 = 1 << 63; +const U8_POISON: u8 = 1 << 7; + +impl PoisonableBitSet for u64 { + fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> { + vec![0u64; vec_len] + } + + fn size(&self) -> usize { + 8 + } + + fn capacity(&self) -> usize { + 63 + } + + fn add(&mut self, n: usize) { + (*self) |= 1u64 << n; + } + + fn discard(&mut self, n: usize) { + (*self) &= u64::MAX - (1u64 << n); + } + + fn union(&mut self, other: &Self) { + if *self != *other { + (*self) |= *other; + } + } + + fn is_full_range(&self, n: usize) -> bool { + *self + 1 == (1u64 << n) + } + + fn is_empty(&self) -> bool { + *self == 0 + } + + fn poison(&mut self) { + *self = U64_POISON; + } + + fn is_poisoned(&self) -> bool { + // equality comparison would be tempting but would not resist + // operations after poisoning (even if these should be bogus). + *self >= U64_POISON + } +} + +impl PoisonableBitSet for u8 { + fn vec_of_empty(_sets_size: usize, vec_len: usize) -> Vec<Self> { + vec![0; vec_len] + } + + fn size(&self) -> usize { + 1 + } + + fn capacity(&self) -> usize { + 7 + } + + fn add(&mut self, n: usize) { + (*self) |= 1 << n; + } + + fn discard(&mut self, n: usize) { + (*self) &= u8::MAX - (1 << n); + } + + fn union(&mut self, other: &Self) { + if *self != *other { + (*self) |= *other; + } + } + + fn is_full_range(&self, n: usize) -> bool { + *self + 1 == (1 << n) + } + + fn is_empty(&self) -> bool { + *self == 0 + } + + fn poison(&mut self) { + *self = U8_POISON; + } + + fn is_poisoned(&self) -> bool { + // equality comparison would be tempting but would not resist + // operations after poisoning (even if these should be bogus). + *self >= U8_POISON + } +} + +/// A poisonable bit set whose capacity is not known at compile time but +/// is constant after initial construction +/// +/// This can be way further optimized if performance assessments (speed +/// and/or RAM) require it. +/// As far as RAM is concerned, for large vectors of these, the main problem +/// would be the repetition of set_size in each item. We would need a trait +/// to abstract over the idea of a vector of such bit sets to do better. +#[derive(Clone, PartialEq)] +struct NonStaticPoisonableBitSet { + set_size: usize, + bit_set: Vec<u64>, +} + +/// Number of `u64` needed for a [`NonStaticPoisonableBitSet`] of given size +fn non_static_poisonable_inner_len(set_size: usize) -> usize { + 1 + (set_size + 1) / 64 +} + +impl NonStaticPoisonableBitSet { + /// The index of the sub-bit set for the given n, and the index inside + /// the latter + fn index(&self, n: usize) -> (usize, usize) { + (n / 64, n % 64) + } +} + +/// Mock implementation to ensure that the trait makes sense +impl PoisonableBitSet for NonStaticPoisonableBitSet { + fn vec_of_empty(set_size: usize, vec_len: usize) -> Vec<Self> { + let tmpl = Self { + set_size, + bit_set: vec![0u64; non_static_poisonable_inner_len(set_size)], + }; + vec![tmpl; vec_len] + } + + fn size(&self) -> usize { + 8 + self.bit_set.len() * 8 + } + + fn capacity(&self) -> usize { + self.set_size + } + + fn add(&mut self, n: usize) { + let (sub_bs, bit_pos) = self.index(n); + self.bit_set[sub_bs] |= 1 << bit_pos + } + + fn discard(&mut self, n: usize) { + let (sub_bs, bit_pos) = self.index(n); + self.bit_set[sub_bs] |= u64::MAX - (1 << bit_pos) + } + + fn union(&mut self, other: &Self) { + assert!( + self.set_size == other.set_size, + "Binary operations on bit sets can only be done on same size" + ); + for i in 0..self.bit_set.len() - 1 { + self.bit_set[i] |= other.bit_set[i] + } + } + + fn is_full_range(&self, n: usize) -> bool { + let (sub_bs, bit_pos) = self.index(n); + self.bit_set[..sub_bs].iter().all(|bs| *bs == u64::MAX) + && self.bit_set[sub_bs] == (1 << (bit_pos + 1)) - 1 + } + + fn is_empty(&self) -> bool { + self.bit_set.iter().all(|bs| *bs == 0u64) + } + + fn poison(&mut self) { + let (sub_bs, bit_pos) = self.index(self.set_size); + self.bit_set[sub_bs] = 1 << bit_pos; + } + + fn is_poisoned(&self) -> bool { + let (sub_bs, bit_pos) = self.index(self.set_size); + self.bit_set[sub_bs] >= 1 << bit_pos + } +} + +/// Set of roots of all non-public phases +pub type RootsPerPhase = [HashSet<Revision>; Phase::non_public_phases().len()]; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +pub enum Phase { + Public = 0, + Draft = 1, + Secret = 2, + Archived = 3, + Internal = 4, +} + +impl TryFrom<usize> for Phase { + type Error = RevlogError; + + fn try_from(value: usize) -> Result<Self, Self::Error> { + Ok(match value { + 0 => Self::Public, + 1 => Self::Draft, + 2 => Self::Secret, + 32 => Self::Archived, + 96 => Self::Internal, + v => { + return Err(RevlogError::corrupted(format!( + "invalid phase value {}", + v + ))) + } + }) + } +} + +impl Phase { + pub const fn all_phases() -> &'static [Self] { + &[ + Self::Public, + Self::Draft, + Self::Secret, + Self::Archived, + Self::Internal, + ] + } + pub const fn non_public_phases() -> &'static [Self] { + &[Self::Draft, Self::Secret, Self::Archived, Self::Internal] + } +} + +fn inline_scan(bytes: &[u8]) -> (usize, Vec<usize>) { + let mut offset: usize = 0; + let mut offsets = Vec::new(); + + while offset + INDEX_ENTRY_SIZE <= bytes.len() { + offsets.push(offset); + let end = offset + INDEX_ENTRY_SIZE; + let entry = IndexEntry { + bytes: &bytes[offset..end], + offset_override: None, + }; + + offset += INDEX_ENTRY_SIZE + entry.compressed_len() as usize; + } + (offset, offsets) } impl super::RevlogIndex for Index { @@ -250,6 +1624,9 @@ } fn node(&self, rev: Revision) -> Option<&Node> { + if rev == NULL_REVISION { + return Some(&NULL_NODE); + } self.get_entry(rev).map(|entry| entry.hash()) } } @@ -281,6 +1658,14 @@ BigEndian::read_u64(&bytes[..]) as usize } } + pub fn raw_offset(&self) -> u64 { + BigEndian::read_u64(&self.bytes[0..8]) + } + + /// Same result (except potentially for rev 0) as C `index_get_start()` + fn c_start(&self) -> u64 { + self.raw_offset() >> 16 + } pub fn flags(&self) -> u16 { BigEndian::read_u16(&self.bytes[6..=7]) @@ -323,6 +1708,10 @@ pub fn hash(&self) -> &'a Node { (&self.bytes[32..52]).try_into().unwrap() } + + pub fn as_bytes(&self) -> &'a [u8] { + self.bytes + } } #[cfg(test)] @@ -433,28 +1822,28 @@ pub fn build(&self) -> Vec<u8> { let mut bytes = Vec::with_capacity(INDEX_ENTRY_SIZE); if self.is_first { - bytes.extend(&match (self.is_general_delta, self.is_inline) { + bytes.extend(match (self.is_general_delta, self.is_inline) { (false, false) => [0u8, 0], (false, true) => [0u8, 1], (true, false) => [0u8, 2], (true, true) => [0u8, 3], }); - bytes.extend(&self.version.to_be_bytes()); + bytes.extend(self.version.to_be_bytes()); // Remaining offset bytes. - bytes.extend(&[0u8; 2]); + bytes.extend([0u8; 2]); } else { // Offset stored on 48 bits (6 bytes) bytes.extend(&(self.offset as u64).to_be_bytes()[2..]); } - bytes.extend(&[0u8; 2]); // Revision flags. - bytes.extend(&(self.compressed_len as u32).to_be_bytes()); - bytes.extend(&(self.uncompressed_len as u32).to_be_bytes()); + bytes.extend([0u8; 2]); // Revision flags. + bytes.extend((self.compressed_len as u32).to_be_bytes()); + bytes.extend((self.uncompressed_len as u32).to_be_bytes()); bytes.extend( - &self.base_revision_or_base_of_delta_chain.0.to_be_bytes(), + self.base_revision_or_base_of_delta_chain.0.to_be_bytes(), ); - bytes.extend(&self.link_revision.0.to_be_bytes()); - bytes.extend(&self.p1.0.to_be_bytes()); - bytes.extend(&self.p2.0.to_be_bytes()); + bytes.extend(self.link_revision.0.to_be_bytes()); + bytes.extend(self.p1.0.to_be_bytes()); + bytes.extend(self.p2.0.to_be_bytes()); bytes.extend(self.node.as_bytes()); bytes.extend(vec![0u8; 12]); bytes @@ -464,6 +1853,7 @@ pub fn is_inline(index_bytes: &[u8]) -> bool { IndexHeader::parse(index_bytes) .expect("too short") + .unwrap() .format_flags() .is_inline() } @@ -471,6 +1861,7 @@ pub fn uses_generaldelta(index_bytes: &[u8]) -> bool { IndexHeader::parse(index_bytes) .expect("too short") + .unwrap() .format_flags() .uses_generaldelta() } @@ -478,6 +1869,7 @@ pub fn get_version(index_bytes: &[u8]) -> u16 { IndexHeader::parse(index_bytes) .expect("too short") + .unwrap() .format_version() }
--- a/rust/hg-core/src/revlog/manifest.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/revlog/manifest.rs Mon Feb 12 16:22:47 2024 +0100 @@ -4,12 +4,14 @@ use crate::utils::hg_path::HgPath; use crate::utils::SliceExt; use crate::vfs::Vfs; -use crate::{Graph, GraphError, Revision, UncheckedRevision}; +use crate::{ + Graph, GraphError, Revision, RevlogOpenOptions, UncheckedRevision, +}; /// A specialized `Revlog` to work with `manifest` data format. pub struct Manifestlog { /// The generic `revlog` format. - revlog: Revlog, + pub(crate) revlog: Revlog, } impl Graph for Manifestlog { @@ -20,9 +22,11 @@ impl Manifestlog { /// Open the `manifest` of a repository given by its root. - pub fn open(store_vfs: &Vfs, use_nodemap: bool) -> Result<Self, HgError> { - let revlog = - Revlog::open(store_vfs, "00manifest.i", None, use_nodemap)?; + pub fn open( + store_vfs: &Vfs, + options: RevlogOpenOptions, + ) -> Result<Self, HgError> { + let revlog = Revlog::open(store_vfs, "00manifest.i", None, options)?; Ok(Self { revlog }) }
--- a/rust/hg-core/src/revlog/mod.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/revlog/mod.rs Mon Feb 12 16:22:47 2024 +0100 @@ -148,7 +148,10 @@ fn node(&self, rev: Revision) -> Option<&Node>; /// Return a [`Revision`] if `rev` is a valid revision number for this - /// index + /// index. + /// + /// [`NULL_REVISION`] is considered to be valid. + #[inline(always)] fn check_revision(&self, rev: UncheckedRevision) -> Option<Revision> { let rev = rev.0; @@ -225,6 +228,55 @@ } } +#[derive(Debug, Copy, Clone)] +pub enum RevlogVersionOptions { + V0, + V1 { generaldelta: bool }, + V2, + ChangelogV2 { compute_rank: bool }, +} + +/// Options to govern how a revlog should be opened, usually from the +/// repository configuration or requirements. +#[derive(Debug, Copy, Clone)] +pub struct RevlogOpenOptions { + /// The revlog version, along with any option specific to this version + pub version: RevlogVersionOptions, + /// Whether the revlog uses a persistent nodemap. + pub use_nodemap: bool, + // TODO other non-header/version options, +} + +impl RevlogOpenOptions { + pub fn new() -> Self { + Self { + version: RevlogVersionOptions::V1 { generaldelta: true }, + use_nodemap: false, + } + } + + fn default_index_header(&self) -> index::IndexHeader { + index::IndexHeader { + header_bytes: match self.version { + RevlogVersionOptions::V0 => [0, 0, 0, 0], + RevlogVersionOptions::V1 { generaldelta } => { + [0, if generaldelta { 3 } else { 1 }, 0, 1] + } + RevlogVersionOptions::V2 => 0xDEADu32.to_be_bytes(), + RevlogVersionOptions::ChangelogV2 { compute_rank: _ } => { + 0xD34Du32.to_be_bytes() + } + }, + } + } +} + +impl Default for RevlogOpenOptions { + fn default() -> Self { + Self::new() + } +} + impl Revlog { /// Open a revlog index file. /// @@ -234,24 +286,30 @@ store_vfs: &Vfs, index_path: impl AsRef<Path>, data_path: Option<&Path>, - use_nodemap: bool, + options: RevlogOpenOptions, ) -> Result<Self, HgError> { - Self::open_gen(store_vfs, index_path, data_path, use_nodemap, None) + Self::open_gen(store_vfs, index_path, data_path, options, None) } fn open_gen( store_vfs: &Vfs, index_path: impl AsRef<Path>, data_path: Option<&Path>, - use_nodemap: bool, + options: RevlogOpenOptions, nodemap_for_test: Option<nodemap::NodeTree>, ) -> Result<Self, HgError> { let index_path = index_path.as_ref(); let index = { match store_vfs.mmap_open_opt(index_path)? { - None => Index::new(Box::<Vec<_>>::default()), + None => Index::new( + Box::<Vec<_>>::default(), + options.default_index_header(), + ), Some(index_mmap) => { - let index = Index::new(Box::new(index_mmap))?; + let index = Index::new( + Box::new(index_mmap), + options.default_index_header(), + )?; Ok(index) } } @@ -270,7 +328,7 @@ Some(Box::new(data_mmap)) }; - let nodemap = if index.is_inline() || !use_nodemap { + let nodemap = if index.is_inline() || !options.use_nodemap { None } else { NodeMapDocket::read_from_file(store_vfs, index_path)?.map( @@ -809,7 +867,9 @@ let temp = tempfile::tempdir().unwrap(); let vfs = Vfs { base: temp.path() }; std::fs::write(temp.path().join("foo.i"), b"").unwrap(); - let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); + let revlog = + Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new()) + .unwrap(); assert!(revlog.is_empty()); assert_eq!(revlog.len(), 0); assert!(revlog.get_entry(0.into()).is_err()); @@ -855,7 +915,9 @@ .flatten() .collect_vec(); std::fs::write(temp.path().join("foo.i"), contents).unwrap(); - let revlog = Revlog::open(&vfs, "foo.i", None, false).unwrap(); + let revlog = + Revlog::open(&vfs, "foo.i", None, RevlogOpenOptions::new()) + .unwrap(); let entry0 = revlog.get_entry(0.into()).ok().unwrap(); assert_eq!(entry0.revision(), Revision(0)); @@ -926,8 +988,14 @@ idx.insert_node(Revision(0), node0).unwrap(); idx.insert_node(Revision(1), node1).unwrap(); - let revlog = - Revlog::open_gen(&vfs, "foo.i", None, true, Some(idx.nt)).unwrap(); + let revlog = Revlog::open_gen( + &vfs, + "foo.i", + None, + RevlogOpenOptions::new(), + Some(idx.nt), + ) + .unwrap(); // accessing the data shows the corruption revlog.get_entry(0.into()).unwrap().data().unwrap_err();
--- a/rust/hg-core/src/revlog/node.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/revlog/node.rs Mon Feb 12 16:22:47 2024 +0100 @@ -20,6 +20,10 @@ /// the future. pub const NODE_BYTES_LENGTH: usize = 20; +/// The length in bytes set aside on disk for a `Node`. Revlog up to v1 only +/// use 20 out of those 32. +pub const STORED_NODE_ID_BYTES: usize = 32; + /// Id of the null node. /// /// Used to indicate the absence of node.
--- a/rust/hg-core/src/utils.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-core/src/utils.rs Mon Feb 12 16:22:47 2024 +0100 @@ -11,7 +11,10 @@ use crate::utils::hg_path::HgPath; use im_rc::ordmap::DiffItem; use im_rc::ordmap::OrdMap; +use itertools::EitherOrBoth; +use itertools::Itertools; use std::cell::Cell; +use std::cmp::Ordering; use std::fmt; use std::{io::Write, ops::Deref}; @@ -499,6 +502,43 @@ }) } +/// Like `itertools::merge_join_by`, but merges fallible iterators. +/// +/// The callback is only used for Ok values. Errors are passed through as-is. +/// Errors compare less than Ok values, which makes the error handling +/// conservative. +pub fn merge_join_results_by<'a, I1, I2, F, A, B, E>( + iter1: I1, + iter2: I2, + f: F, +) -> impl Iterator<Item = Result<EitherOrBoth<A, B>, E>> + 'a +where + I1: Iterator<Item = Result<A, E>> + 'a, + I2: Iterator<Item = Result<B, E>> + 'a, + F: FnMut(&A, &B) -> Ordering + 'a, +{ + let mut g = f; + iter1 + .merge_join_by(iter2, move |i1, i2| match i1 { + Err(_) => Ordering::Less, + Ok(i1) => match i2 { + Err(_) => Ordering::Greater, + Ok(i2) => g(i1, i2), + }, + }) + .map(|result| match result { + EitherOrBoth::Left(Err(e)) => Err(e), + EitherOrBoth::Right(Err(e)) => Err(e), + EitherOrBoth::Both(Err(e), _) => Err(e), + EitherOrBoth::Both(_, Err(e)) => Err(e), + EitherOrBoth::Left(Ok(v)) => Ok(EitherOrBoth::Left(v)), + EitherOrBoth::Right(Ok(v)) => Ok(EitherOrBoth::Right(v)), + EitherOrBoth::Both(Ok(v1), Ok(v2)) => { + Ok(EitherOrBoth::Both(v1, v2)) + } + }) +} + /// Force the global rayon threadpool to not exceed 16 concurrent threads /// unless the user has specified a value. /// This is a stop-gap measure until we figure out why using more than 16
--- a/rust/hg-cpython/src/ancestors.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-cpython/src/ancestors.rs Mon Feb 12 16:22:47 2024 +0100 @@ -34,15 +34,17 @@ //! [`LazyAncestors`]: struct.LazyAncestors.html //! [`MissingAncestors`]: struct.MissingAncestors.html //! [`AncestorsIterator`]: struct.AncestorsIterator.html -use crate::revlog::pyindex_to_graph; +use crate::revlog::py_rust_index_to_graph; use crate::PyRevision; use crate::{ - cindex::Index, conversion::rev_pyiter_collect, exceptions::GraphError, + conversion::rev_pyiter_collect, exceptions::GraphError, + revlog::PySharedIndex, }; use cpython::{ - ObjectProtocol, PyClone, PyDict, PyList, PyModule, PyObject, PyResult, - Python, PythonObject, ToPyObject, + ObjectProtocol, PyClone, PyDict, PyErr, PyList, PyModule, PyObject, + PyResult, Python, PythonObject, ToPyObject, UnsafePyLeaked, }; + use hg::MissingAncestors as CoreMissing; use hg::Revision; use std::cell::RefCell; @@ -52,11 +54,46 @@ LazyAncestors as VCGLazyAncestors, }; +// Error propagation for an [`UnsafePyLeaked`] wrapping a [`Result`] +// +// It would be nice for UnsharedPyLeaked to provide this directly as a variant +// of the `map` method with a signature such as: +// +// ``` +// unsafe fn map_or_err(py: Python, +// f: impl FnOnce(T) -> Result(U, E), +// convert_err: impl FnOnce(Python, E) -> PyErr) +// ``` +// +// This would spare users of the `cpython` crate the additional `unsafe` deref +// to inspect the error and return it outside `UnsafePyLeaked`, and the +// subsequent unwrapping that this function performs. +fn pyleaked_or_map_err<T, E: std::fmt::Debug + Copy>( + py: Python, + leaked: UnsafePyLeaked<Result<T, E>>, + convert_err: impl FnOnce(Python, E) -> PyErr, +) -> PyResult<UnsafePyLeaked<T>> { + // Result.inspect_err is unstable in Rust 1.61 + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + if let Err(e) = *unsafe { leaked.try_borrow(py)? } { + return Err(convert_err(py, e)); + } + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + Ok(unsafe { + leaked.map(py, |res| { + res.expect("Error case should have already be treated") + }) + }) +} + py_class!(pub class AncestorsIterator |py| { - data inner: RefCell<Box<VCGAncestorsIterator<Index>>>; + data inner: RefCell<UnsafePyLeaked<VCGAncestorsIterator<PySharedIndex>>>; def __next__(&self) -> PyResult<Option<PyRevision>> { - match self.inner(py).borrow_mut().next() { + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + match inner.next() { Some(Err(e)) => Err(GraphError::pynew_from_vcsgraph(py, e)), None => Ok(None), Some(Ok(r)) => Ok(Some(PyRevision(r))), @@ -64,7 +101,10 @@ } def __contains__(&self, rev: PyRevision) -> PyResult<bool> { - self.inner(py).borrow_mut().contains(rev.0) + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + inner.contains(rev.0) .map_err(|e| GraphError::pynew_from_vcsgraph(py, e)) } @@ -79,16 +119,7 @@ stoprev: PyRevision, inclusive: bool ) -> PyResult<AncestorsIterator> { - let index = pyindex_to_graph(py, index)?; - let initvec: Vec<_> = rev_pyiter_collect(py, &initrevs, &index)?; - let ait = VCGAncestorsIterator::new( - index, - initvec.into_iter().map(|r| r.0), - stoprev.0, - inclusive, - ) - .map_err(|e| GraphError::pynew_from_vcsgraph(py, e))?; - AncestorsIterator::from_inner(py, ait) + Self::inner_new(py, index, initrevs, stoprev, inclusive) } }); @@ -96,28 +127,75 @@ impl AncestorsIterator { pub fn from_inner( py: Python, - ait: VCGAncestorsIterator<Index>, + ait: UnsafePyLeaked<VCGAncestorsIterator<PySharedIndex>>, ) -> PyResult<Self> { - Self::create_instance(py, RefCell::new(Box::new(ait))) + Self::create_instance(py, RefCell::new(ait)) + } + + pub fn inner_new( + py: Python, + index: PyObject, + initrevs: PyObject, + stoprev: PyRevision, + inclusive: bool, + ) -> PyResult<AncestorsIterator> { + let index = py_rust_index_to_graph(py, index)?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let initvec: Vec<_> = { + let borrowed_idx = unsafe { index.try_borrow(py)? }; + rev_pyiter_collect(py, &initrevs, &*borrowed_idx)? + }; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let res_ait = unsafe { + index.map(py, |idx| { + VCGAncestorsIterator::new( + idx, + initvec.into_iter().map(|r| r.0), + stoprev.0, + inclusive, + ) + }) + }; + let ait = + pyleaked_or_map_err(py, res_ait, GraphError::pynew_from_vcsgraph)?; + AncestorsIterator::from_inner(py, ait) } } py_class!(pub class LazyAncestors |py| { - data inner: RefCell<Box<VCGLazyAncestors<Index>>>; + data inner: RefCell<UnsafePyLeaked< + RefCell<VCGLazyAncestors<PySharedIndex>> + >>; + data index: PyObject; + data initrevs: PyObject; + data stoprev: PyRevision; + data inclusive: bool; def __contains__(&self, rev: PyRevision) -> PyResult<bool> { - self.inner(py) - .borrow_mut() - .contains(rev.0) + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &RefCell<VCGLazyAncestors<PySharedIndex>> = + &*unsafe { leaked.try_borrow(py)? }; + let inner_mut: &mut VCGLazyAncestors<PySharedIndex> = + &mut inner.borrow_mut(); + inner_mut.contains(rev.0) .map_err(|e| GraphError::pynew_from_vcsgraph(py, e)) } def __iter__(&self) -> PyResult<AncestorsIterator> { - AncestorsIterator::from_inner(py, self.inner(py).borrow().iter()) + let index = self.index(py).clone_ref(py); + let initrevs = self.initrevs(py).clone_ref(py); + AncestorsIterator::inner_new(py, index, initrevs, + *self.stoprev(py), + *self.inclusive(py)) } def __bool__(&self) -> PyResult<bool> { - Ok(!self.inner(py).borrow().is_empty()) + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { leaked.try_borrow(py)? }; + let empty = inner.borrow().is_empty(); + Ok(!empty) } def __new__( @@ -127,26 +205,39 @@ stoprev: PyRevision, inclusive: bool ) -> PyResult<Self> { - let index = pyindex_to_graph(py, index)?; - let initvec: Vec<_> = rev_pyiter_collect(py, &initrevs, &index)?; + let cloned_index = index.clone_ref(py); + let index = py_rust_index_to_graph(py, index)?; + let initvec: Vec<_> = { + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let borrowed_idx = unsafe {index.try_borrow(py)?}; + rev_pyiter_collect(py, &initrevs, &*borrowed_idx)? + }; - let lazy = - VCGLazyAncestors::new( - index, + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let res_lazy = + unsafe { index.map(py, |idx| VCGLazyAncestors::new( + idx, initvec.into_iter().map(|r| r.0), stoprev.0, inclusive - ) - .map_err(|e| GraphError::pynew_from_vcsgraph(py, e))?; - - Self::create_instance(py, RefCell::new(Box::new(lazy))) + ))}; + let lazy = pyleaked_or_map_err(py, res_lazy, + GraphError::pynew_from_vcsgraph)?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let lazy_cell = unsafe { lazy.map(py, RefCell::new)}; + let res = Self::create_instance( + py, RefCell::new(lazy_cell), + cloned_index, initrevs, stoprev, inclusive)?; + Ok(res) } }); py_class!(pub class MissingAncestors |py| { - data inner: RefCell<Box<CoreMissing<Index>>>; - data index: RefCell<Index>; + data inner: RefCell<UnsafePyLeaked< + CoreMissing<PySharedIndex> + >>; + data index: PyObject; def __new__( _cls, @@ -154,25 +245,47 @@ bases: PyObject ) -> PyResult<MissingAncestors> { - let index = pyindex_to_graph(py, index)?; - let bases_vec: Vec<_> = rev_pyiter_collect(py, &bases, &index)?; + let cloned_index = index.clone_ref(py); + let inner_index = py_rust_index_to_graph(py, index)?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let bases_vec: Vec<_> = { + let borrowed_idx = unsafe { inner_index.try_borrow(py)? }; + rev_pyiter_collect(py, &bases, &*borrowed_idx)? + }; - let inner = CoreMissing::new(index.clone_ref(py), bases_vec); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { + inner_index.map(py, |idx| CoreMissing::new(idx, bases_vec)) + }; MissingAncestors::create_instance( py, - RefCell::new(Box::new(inner)), - RefCell::new(index) + RefCell::new(inner), + cloned_index, ) } def hasbases(&self) -> PyResult<bool> { - Ok(self.inner(py).borrow().has_bases()) + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &CoreMissing<PySharedIndex> = + &*unsafe { leaked.try_borrow(py)? }; + Ok(inner.has_bases()) } def addbases(&self, bases: PyObject) -> PyResult<PyObject> { - let index = self.index(py).borrow(); - let bases_vec: Vec<_> = rev_pyiter_collect(py, &bases, &*index)?; - let mut inner = self.inner(py).borrow_mut(); + let bases_vec: Vec<_> = { + let leaked = py_rust_index_to_graph(py, + self.index(py).clone_ref(py))?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = &*unsafe { leaked.try_borrow(py)? }; + rev_pyiter_collect(py, &bases, index)? + }; + + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &mut CoreMissing<PySharedIndex> = + &mut *unsafe { leaked.try_borrow_mut(py)? }; + inner.add_bases(bases_vec); // cpython doc has examples with PyResult<()> but this gives me // the trait `cpython::ToPyObject` is not implemented for `()` @@ -181,18 +294,22 @@ } def bases(&self) -> PyResult<HashSet<PyRevision>> { - Ok( - self.inner(py) - .borrow() - .get_bases() - .iter() - .map(|r| PyRevision(r.0)) - .collect() + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &CoreMissing<PySharedIndex> = + &*unsafe { leaked.try_borrow(py)? }; + Ok(inner.get_bases() + .iter() + .map(|r| PyRevision(r.0)) + .collect() ) } def basesheads(&self) -> PyResult<HashSet<PyRevision>> { - let inner = self.inner(py).borrow(); + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &CoreMissing<PySharedIndex> = + &*unsafe { leaked.try_borrow(py)? }; Ok( inner .bases_heads() @@ -204,19 +321,28 @@ } def removeancestorsfrom(&self, revs: PyObject) -> PyResult<PyObject> { - let index = self.index(py).borrow(); - // this is very lame: we convert to a Rust set, update it in place - // and then convert back to Python, only to have Python remove the - // excess (thankfully, Python is happy with a list or even an iterator) - // Leads to improve this: - // - have the CoreMissing instead do something emit revisions to - // discard - // - define a trait for sets of revisions in the core and implement - // it for a Python set rewrapped with the GIL marker - let mut revs_pyset: HashSet<Revision> = rev_pyiter_collect( - py, &revs, &*index - )?; - let mut inner = self.inner(py).borrow_mut(); + let mut revs_pyset: HashSet<Revision> = { + // this is very lame: we convert to a Rust set, update it in place + // and then convert back to Python, only to have Python remove the + // excess (thankfully, Python is happy with a list or even an + // iterator) + // Leads to improve this: + // - have the CoreMissing instead do something emit revisions to + // discard + // - define a trait for sets of revisions in the core and + // implement it for a Python set rewrapped with the GIL marker + let leaked = py_rust_index_to_graph(py, + self.index(py).clone_ref(py))?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = &*unsafe { leaked.try_borrow(py)? }; + rev_pyiter_collect(py, &revs, index)? + }; + + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &mut CoreMissing<PySharedIndex> = + &mut *unsafe { leaked.try_borrow_mut(py)? }; + inner.remove_ancestors_from(&mut revs_pyset) .map_err(|e| GraphError::pynew(py, e))?; @@ -233,10 +359,19 @@ } def missingancestors(&self, revs: PyObject) -> PyResult<PyList> { - let index = self.index(py).borrow(); - let revs_vec: Vec<Revision> = rev_pyiter_collect(py, &revs, &*index)?; + let revs_vec: Vec<Revision> = { + let leaked = py_rust_index_to_graph(py, + self.index(py).clone_ref(py))?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = &*unsafe { leaked.try_borrow(py)? }; + rev_pyiter_collect(py, &revs, index)? + }; - let mut inner = self.inner(py).borrow_mut(); + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner: &mut CoreMissing<PySharedIndex> = + &mut *unsafe { leaked.try_borrow_mut(py)? }; + let missing_vec = match inner.missing_ancestors(revs_vec) { Ok(missing) => missing, Err(e) => {
--- a/rust/hg-cpython/src/cindex.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-cpython/src/cindex.rs Mon Feb 12 16:22:47 2024 +0100 @@ -9,7 +9,7 @@ //! //! Ideally, we should use an Index entirely implemented in Rust, //! but this will take some time to get there. - +#![allow(dead_code)] use cpython::{ exc::ImportError, exc::TypeError, ObjectProtocol, PyClone, PyErr, PyObject, PyResult, PyTuple, Python, PythonObject,
--- a/rust/hg-cpython/src/conversion.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-cpython/src/conversion.rs Mon Feb 12 16:22:47 2024 +0100 @@ -28,17 +28,28 @@ C: FromIterator<Revision>, I: RevlogIndex, { + rev_pyiter_collect_or_else(py, revs, index, |r| { + PyErr::new::<GraphError, _>(py, ("InvalidRevision", r.0)) + }) +} + +/// Same as [`rev_pyiter_collect`], giving control on returned errors +pub fn rev_pyiter_collect_or_else<C, I>( + py: Python, + revs: &PyObject, + index: &I, + invalid_rev_error: impl FnOnce(PyRevision) -> PyErr + Copy, +) -> PyResult<C> +where + C: FromIterator<Revision>, + I: RevlogIndex, +{ revs.iter(py)? .map(|r| { r.and_then(|o| match o.extract::<PyRevision>(py) { Ok(r) => index .check_revision(UncheckedRevision(r.0)) - .ok_or_else(|| { - PyErr::new::<GraphError, _>( - py, - ("InvalidRevision", r.0), - ) - }), + .ok_or_else(|| invalid_rev_error(r)), Err(e) => Err(e), }) })
--- a/rust/hg-cpython/src/dagops.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-cpython/src/dagops.rs Mon Feb 12 16:22:47 2024 +0100 @@ -15,10 +15,9 @@ use hg::dagops; use hg::Revision; use std::collections::HashSet; -use vcsgraph::ancestors::node_rank; -use vcsgraph::graph::{Parents, Rank}; +use vcsgraph::graph::Rank; -use crate::revlog::pyindex_to_graph; +use crate::revlog::py_rust_index_to_graph; /// Using the the `index`, return heads out of any Python iterable of Revisions /// @@ -28,23 +27,34 @@ index: PyObject, revs: PyObject, ) -> PyResult<HashSet<PyRevision>> { - let index = pyindex_to_graph(py, index)?; - let mut as_set: HashSet<Revision> = rev_pyiter_collect(py, &revs, &index)?; - dagops::retain_heads(&index, &mut as_set) + let py_leaked = py_rust_index_to_graph(py, index)?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = &*unsafe { py_leaked.try_borrow(py)? }; + let mut as_set: HashSet<Revision> = rev_pyiter_collect(py, &revs, index)?; + dagops::retain_heads(index, &mut as_set) .map_err(|e| GraphError::pynew(py, e))?; Ok(as_set.into_iter().map(Into::into).collect()) } /// Computes the rank, i.e. the number of ancestors including itself, /// of a node represented by its parents. +/// +/// Currently, the pure Rust index supports only the REVLOGV1 format, hence +/// the only possible return value is that the rank is unknown. +/// +/// References: +/// - C implementation, function `index_fast_rank()`. +/// - `impl vcsgraph::graph::RankedGraph for Index` in `crate::cindex`. pub fn rank( py: Python, - index: PyObject, - p1r: PyRevision, - p2r: PyRevision, + _index: PyObject, + _p1r: PyRevision, + _p2r: PyRevision, ) -> PyResult<Rank> { - node_rank(&pyindex_to_graph(py, index)?, &Parents([p1r.0, p2r.0])) - .map_err(|e| GraphError::pynew_from_vcsgraph(py, e)) + Err(GraphError::pynew_from_vcsgraph( + py, + vcsgraph::graph::GraphReadError::InconsistentGraphData, + )) } /// Create the module, with `__package__` given from parent
--- a/rust/hg-cpython/src/discovery.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-cpython/src/discovery.rs Mon Feb 12 16:22:47 2024 +0100 @@ -9,16 +9,17 @@ //! `hg-core` crate. From Python, this will be seen as `rustext.discovery` //! //! # Classes visible from Python: -//! - [`PartialDiscover`] is the Rust implementation of +//! - [`PartialDiscovery`] is the Rust implementation of //! `mercurial.setdiscovery.partialdiscovery`. use crate::PyRevision; use crate::{ - cindex::Index, conversion::rev_pyiter_collect, exceptions::GraphError, + conversion::rev_pyiter_collect, exceptions::GraphError, + revlog::PySharedIndex, }; use cpython::{ ObjectProtocol, PyClone, PyDict, PyModule, PyObject, PyResult, PyTuple, - Python, PythonObject, ToPyObject, + Python, PythonObject, ToPyObject, UnsafePyLeaked, }; use hg::discovery::PartialDiscovery as CorePartialDiscovery; use hg::Revision; @@ -26,11 +27,11 @@ use std::cell::RefCell; -use crate::revlog::pyindex_to_graph; +use crate::revlog::py_rust_index_to_graph; py_class!(pub class PartialDiscovery |py| { - data inner: RefCell<Box<CorePartialDiscovery<Index>>>; - data index: RefCell<Index>; + data inner: RefCell<UnsafePyLeaked<CorePartialDiscovery<PySharedIndex>>>; + data index: RefCell<UnsafePyLeaked<PySharedIndex>>; // `_respectsize` is currently only here to replicate the Python API and // will be used in future patches inside methods that are yet to be @@ -42,43 +43,124 @@ respectsize: bool, randomize: bool = true ) -> PyResult<PartialDiscovery> { - let index = repo.getattr(py, "changelog")?.getattr(py, "index")?; - let index = pyindex_to_graph(py, index)?; - let target_heads = rev_pyiter_collect(py, &targetheads, &index)?; - Self::create_instance( - py, - RefCell::new(Box::new(CorePartialDiscovery::new( - index.clone_ref(py), - target_heads, - respectsize, - randomize, - ))), - RefCell::new(index), - ) + Self::inner_new(py, repo, targetheads, respectsize, randomize) } def addcommons(&self, commons: PyObject) -> PyResult<PyObject> { - let index = self.index(py).borrow(); - let commons_vec: Vec<_> = rev_pyiter_collect(py, &commons, &*index)?; - let mut inner = self.inner(py).borrow_mut(); - inner.add_common_revisions(commons_vec) - .map_err(|e| GraphError::pynew(py, e))?; - Ok(py.None()) -} + self.inner_addcommons(py, commons) + } def addmissings(&self, missings: PyObject) -> PyResult<PyObject> { - let index = self.index(py).borrow(); - let missings_vec: Vec<_> = rev_pyiter_collect(py, &missings, &*index)?; - let mut inner = self.inner(py).borrow_mut(); - inner.add_missing_revisions(missings_vec) - .map_err(|e| GraphError::pynew(py, e))?; - Ok(py.None()) + self.inner_addmissings(py, missings) } def addinfo(&self, sample: PyObject) -> PyResult<PyObject> { + self.inner_addinfo(py, sample) + } + + def hasinfo(&self) -> PyResult<bool> { + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { leaked.try_borrow(py)? }; + Ok(inner.has_info()) + } + + def iscomplete(&self) -> PyResult<bool> { + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { leaked.try_borrow(py)? }; + Ok(inner.is_complete()) + } + + def stats(&self) -> PyResult<PyDict> { + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { leaked.try_borrow(py)? }; + let stats = inner.stats(); + let as_dict: PyDict = PyDict::new(py); + as_dict.set_item(py, "undecided", + stats.undecided.map( + |l| l.to_py_object(py).into_object()) + .unwrap_or_else(|| py.None()))?; + Ok(as_dict) + } + + def commonheads(&self) -> PyResult<HashSet<PyRevision>> { + let leaked = self.inner(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let inner = unsafe { leaked.try_borrow(py)? }; + let res = inner.common_heads() + .map_err(|e| GraphError::pynew(py, e))?; + Ok(res.into_iter().map(Into::into).collect()) + } + + def takefullsample(&self, headrevs: PyObject, + size: usize) -> PyResult<PyObject> { + self.inner_takefullsample(py, headrevs, size) + } + + def takequicksample(&self, headrevs: PyObject, + size: usize) -> PyResult<PyObject> { + self.inner_takequicksample(py, headrevs, size) + } + +}); + +impl PartialDiscovery { + fn inner_new( + py: Python, + repo: PyObject, + targetheads: PyObject, + respectsize: bool, + randomize: bool, + ) -> PyResult<Self> { + let index = repo.getattr(py, "changelog")?.getattr(py, "index")?; + let cloned_index = py_rust_index_to_graph(py, index.clone_ref(py))?; + let index = py_rust_index_to_graph(py, index)?; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let target_heads = { + let borrowed_idx = unsafe { index.try_borrow(py)? }; + rev_pyiter_collect(py, &targetheads, &*borrowed_idx)? + }; + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let lazy_disco = unsafe { + index.map(py, |idx| { + CorePartialDiscovery::new( + idx, + target_heads, + respectsize, + randomize, + ) + }) + }; + Self::create_instance( + py, + RefCell::new(lazy_disco), + RefCell::new(cloned_index), + ) + } + + /// Convert a Python iterator of revisions into a vector + fn pyiter_to_vec( + &self, + py: Python, + iter: &PyObject, + ) -> PyResult<Vec<Revision>> { + let leaked = self.index(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = unsafe { leaked.try_borrow(py)? }; + rev_pyiter_collect(py, iter, &*index) + } + + fn inner_addinfo( + &self, + py: Python, + sample: PyObject, + ) -> PyResult<PyObject> { let mut missing: Vec<Revision> = Vec::new(); let mut common: Vec<Revision> = Vec::new(); - for info in sample.iter(py)? { // info is a pair (Revision, bool) + for info in sample.iter(py)? { + // info is a pair (Revision, bool) let mut revknown = info?.iter(py)?; let rev: PyRevision = revknown.next().unwrap()?.extract(py)?; // This is fine since we're just using revisions as integers @@ -91,42 +173,59 @@ missing.push(rev); } } - let mut inner = self.inner(py).borrow_mut(); - inner.add_common_revisions(common) + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + inner + .add_common_revisions(common) + .map_err(|e| GraphError::pynew(py, e))?; + inner + .add_missing_revisions(missing) .map_err(|e| GraphError::pynew(py, e))?; - inner.add_missing_revisions(missing) + Ok(py.None()) + } + + fn inner_addcommons( + &self, + py: Python, + commons: PyObject, + ) -> PyResult<PyObject> { + let commons_vec = self.pyiter_to_vec(py, &commons)?; + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + inner + .add_common_revisions(commons_vec) .map_err(|e| GraphError::pynew(py, e))?; Ok(py.None()) } - def hasinfo(&self) -> PyResult<bool> { - Ok(self.inner(py).borrow().has_info()) - } - - def iscomplete(&self) -> PyResult<bool> { - Ok(self.inner(py).borrow().is_complete()) + fn inner_addmissings( + &self, + py: Python, + missings: PyObject, + ) -> PyResult<PyObject> { + let missings_vec = self.pyiter_to_vec(py, &missings)?; + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + inner + .add_missing_revisions(missings_vec) + .map_err(|e| GraphError::pynew(py, e))?; + Ok(py.None()) } - def stats(&self) -> PyResult<PyDict> { - let stats = self.inner(py).borrow().stats(); - let as_dict: PyDict = PyDict::new(py); - as_dict.set_item(py, "undecided", - stats.undecided.map( - |l| l.to_py_object(py).into_object()) - .unwrap_or_else(|| py.None()))?; - Ok(as_dict) - } - - def commonheads(&self) -> PyResult<HashSet<PyRevision>> { - let res = self.inner(py).borrow().common_heads() - .map_err(|e| GraphError::pynew(py, e))?; - Ok(res.into_iter().map(Into::into).collect()) - } - - def takefullsample(&self, _headrevs: PyObject, - size: usize) -> PyResult<PyObject> { - let mut inner = self.inner(py).borrow_mut(); - let sample = inner.take_full_sample(size) + fn inner_takefullsample( + &self, + py: Python, + _headrevs: PyObject, + size: usize, + ) -> PyResult<PyObject> { + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + let sample = inner + .take_full_sample(size) .map_err(|e| GraphError::pynew(py, e))?; let as_vec: Vec<PyObject> = sample .iter() @@ -135,12 +234,18 @@ Ok(PyTuple::new(py, as_vec.as_slice()).into_object()) } - def takequicksample(&self, headrevs: PyObject, - size: usize) -> PyResult<PyObject> { - let index = self.index(py).borrow(); - let mut inner = self.inner(py).borrow_mut(); - let revsvec: Vec<_> = rev_pyiter_collect(py, &headrevs, &*index)?; - let sample = inner.take_quick_sample(revsvec, size) + fn inner_takequicksample( + &self, + py: Python, + headrevs: PyObject, + size: usize, + ) -> PyResult<PyObject> { + let revsvec = self.pyiter_to_vec(py, &headrevs)?; + let mut leaked = self.inner(py).borrow_mut(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let mut inner = unsafe { leaked.try_borrow_mut(py)? }; + let sample = inner + .take_quick_sample(revsvec, size) .map_err(|e| GraphError::pynew(py, e))?; let as_vec: Vec<PyObject> = sample .iter() @@ -148,8 +253,7 @@ .collect(); Ok(PyTuple::new(py, as_vec.as_slice()).into_object()) } - -}); +} /// Create the module, with __package__ given from parent pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
--- a/rust/hg-cpython/src/revlog.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/hg-cpython/src/revlog.rs Mon Feb 12 16:22:47 2024 +0100 @@ -6,43 +6,105 @@ // GNU General Public License version 2 or any later version. use crate::{ - cindex, + conversion::{rev_pyiter_collect, rev_pyiter_collect_or_else}, utils::{node_from_py_bytes, node_from_py_object}, PyRevision, }; use cpython::{ buffer::{Element, PyBuffer}, exc::{IndexError, ValueError}, - ObjectProtocol, PyBytes, PyClone, PyDict, PyErr, PyInt, PyModule, - PyObject, PyResult, PyString, PyTuple, Python, PythonObject, ToPyObject, + ObjectProtocol, PyBool, PyBytes, PyClone, PyDict, PyErr, PyInt, PyList, + PyModule, PyObject, PyResult, PySet, PyString, PyTuple, Python, + PythonObject, ToPyObject, UnsafePyLeaked, }; use hg::{ - nodemap::{Block, NodeMapError, NodeTree}, - revlog::{nodemap::NodeMap, NodePrefix, RevlogIndex}, - BaseRevision, Revision, UncheckedRevision, + errors::HgError, + index::{ + IndexHeader, Phase, RevisionDataParams, SnapshotsCache, + INDEX_ENTRY_SIZE, + }, + nodemap::{Block, NodeMapError, NodeTree as CoreNodeTree}, + revlog::{nodemap::NodeMap, Graph, NodePrefix, RevlogError, RevlogIndex}, + BaseRevision, Node, Revision, UncheckedRevision, NULL_REVISION, }; -use std::cell::RefCell; +use std::{cell::RefCell, collections::HashMap}; +use vcsgraph::graph::Graph as VCSGraph; + +pub struct PySharedIndex { + /// The underlying hg-core index + pub(crate) inner: &'static hg::index::Index, +} /// Return a Struct implementing the Graph trait -pub(crate) fn pyindex_to_graph( +pub(crate) fn py_rust_index_to_graph( py: Python, index: PyObject, -) -> PyResult<cindex::Index> { - match index.extract::<MixedIndex>(py) { - Ok(midx) => Ok(midx.clone_cindex(py)), - Err(_) => cindex::Index::new(py, index), +) -> PyResult<UnsafePyLeaked<PySharedIndex>> { + let midx = index.extract::<Index>(py)?; + let leaked = midx.index(py).leak_immutable(); + // Safety: we don't leak the "faked" reference out of the `UnsafePyLeaked` + Ok(unsafe { leaked.map(py, |idx| PySharedIndex { inner: idx }) }) +} + +impl Clone for PySharedIndex { + fn clone(&self) -> Self { + Self { inner: self.inner } + } +} + +impl Graph for PySharedIndex { + #[inline(always)] + fn parents(&self, rev: Revision) -> Result<[Revision; 2], hg::GraphError> { + self.inner.parents(rev) } } -py_class!(pub class MixedIndex |py| { - data cindex: RefCell<cindex::Index>; - data nt: RefCell<Option<NodeTree>>; +impl VCSGraph for PySharedIndex { + #[inline(always)] + fn parents( + &self, + rev: BaseRevision, + ) -> Result<vcsgraph::graph::Parents, vcsgraph::graph::GraphReadError> + { + // FIXME This trait should be reworked to decide between Revision + // and UncheckedRevision, get better errors names, etc. + match Graph::parents(self, Revision(rev)) { + Ok(parents) => { + Ok(vcsgraph::graph::Parents([parents[0].0, parents[1].0])) + } + Err(hg::GraphError::ParentOutOfRange(rev)) => { + Err(vcsgraph::graph::GraphReadError::KeyedInvalidKey(rev.0)) + } + } + } +} + +impl RevlogIndex for PySharedIndex { + fn len(&self) -> usize { + self.inner.len() + } + fn node(&self, rev: Revision) -> Option<&Node> { + self.inner.node(rev) + } +} + +py_class!(pub class Index |py| { + @shared data index: hg::index::Index; + data nt: RefCell<Option<CoreNodeTree>>; data docket: RefCell<Option<PyObject>>; // Holds a reference to the mmap'ed persistent nodemap data - data mmap: RefCell<Option<PyBuffer>>; + data nodemap_mmap: RefCell<Option<PyBuffer>>; + // Holds a reference to the mmap'ed persistent index data + data index_mmap: RefCell<Option<PyBuffer>>; + data head_revs_py_list: RefCell<Option<PyList>>; + data head_node_ids_py_list: RefCell<Option<PyList>>; - def __new__(_cls, cindex: PyObject) -> PyResult<MixedIndex> { - Self::new(py, cindex) + def __new__( + _cls, + data: PyObject, + default_header: u32, + ) -> PyResult<Self> { + Self::new(py, data, default_header) } /// Compatibility layer used for Python consumers needing access to the C index @@ -51,11 +113,11 @@ /// that may need to build a custom `nodetree`, based on a specified revset. /// With a Rust implementation of the nodemap, we will be able to get rid of /// this, by exposing our own standalone nodemap class, - /// ready to accept `MixedIndex`. - def get_cindex(&self) -> PyResult<PyObject> { + /// ready to accept `Index`. +/* def get_cindex(&self) -> PyResult<PyObject> { Ok(self.cindex(py).borrow().inner().clone_ref(py)) } - +*/ // Index API involving nodemap, as defined in mercurial/pure/parsers.py /// Return Revision if found, raises a bare `error.RevlogError` @@ -63,10 +125,12 @@ def get_rev(&self, node: PyBytes) -> PyResult<Option<PyRevision>> { let opt = self.get_nodetree(py)?.borrow(); let nt = opt.as_ref().unwrap(); - let idx = &*self.cindex(py).borrow(); + let ridx = &*self.index(py).borrow(); let node = node_from_py_bytes(py, &node)?; - let res = nt.find_bin(idx, node.into()); - Ok(res.map_err(|e| nodemap_error(py, e))?.map(Into::into)) + let rust_rev = + nt.find_bin(ridx, node.into()).map_err(|e| nodemap_error(py, e))?; + Ok(rust_rev.map(Into::into)) + } /// same as `get_rev()` but raises a bare `error.RevlogError` if node @@ -80,6 +144,9 @@ /// return True if the node exist in the index def has_node(&self, node: PyBytes) -> PyResult<bool> { + // TODO OPTIM we could avoid a needless conversion here, + // to do when scaffolding for pure Rust switch is removed, + // as `get_rev()` currently does the necessary assertions self.get_rev(py, node).map(|opt| opt.is_some()) } @@ -87,7 +154,7 @@ def shortest(&self, node: PyBytes) -> PyResult<usize> { let opt = self.get_nodetree(py)?.borrow(); let nt = opt.as_ref().unwrap(); - let idx = &*self.cindex(py).borrow(); + let idx = &*self.index(py).borrow(); match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) { Ok(Some(l)) => Ok(l), @@ -99,7 +166,7 @@ def partialmatch(&self, node: PyObject) -> PyResult<Option<PyBytes>> { let opt = self.get_nodetree(py)?.borrow(); let nt = opt.as_ref().unwrap(); - let idx = &*self.cindex(py).borrow(); + let idx = &*self.index(py).borrow(); let node_as_string = if cfg!(feature = "python3-sys") { node.cast_as::<PyString>(py)?.to_string(py)?.to_string() @@ -132,21 +199,36 @@ let node_bytes = tup.get_item(py, 7).extract(py)?; let node = node_from_py_object(py, &node_bytes)?; - let mut idx = self.cindex(py).borrow_mut(); + let rev = self.len(py)? as BaseRevision; // This is ok since we will just add the revision to the index - let rev = Revision(idx.len() as BaseRevision); - idx.append(py, tup)?; - + let rev = Revision(rev); + self.index(py) + .borrow_mut() + .append(py_tuple_to_revision_data_params(py, tup)?) + .unwrap(); + let idx = &*self.index(py).borrow(); self.get_nodetree(py)?.borrow_mut().as_mut().unwrap() - .insert(&*idx, &node, rev) + .insert(idx, &node, rev) .map_err(|e| nodemap_error(py, e))?; Ok(py.None()) } def __delitem__(&self, key: PyObject) -> PyResult<()> { // __delitem__ is both for `del idx[r]` and `del idx[r1:r2]` - self.cindex(py).borrow().inner().del_item(py, key)?; + let start = if let Ok(rev) = key.extract(py) { + UncheckedRevision(rev) + } else { + let start = key.getattr(py, "start")?; + UncheckedRevision(start.extract(py)?) + }; + let start = self.index(py) + .borrow() + .check_revision(start) + .ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(start)) + })?; + self.index(py).borrow_mut().remove(start).unwrap(); let mut opt = self.get_nodetree(py)?.borrow_mut(); let nt = opt.as_mut().unwrap(); nt.invalidate_all(); @@ -155,88 +237,171 @@ } // - // Reforwarded C index API + // Index methods previously reforwarded to C index (tp_methods) + // Same ordering as in revlog.c // - // index_methods (tp_methods). Same ordering as in revlog.c - /// return the gca set of the given revs - def ancestors(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "ancestors", args, kw) + def ancestors(&self, *args, **_kw) -> PyResult<PyObject> { + let rust_res = self.inner_ancestors(py, args)?; + Ok(rust_res) } /// return the heads of the common ancestors of the given revs - def commonancestorsheads(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "commonancestorsheads", args, kw) + def commonancestorsheads(&self, *args, **_kw) -> PyResult<PyObject> { + let rust_res = self.inner_commonancestorsheads(py, args)?; + Ok(rust_res) } /// Clear the index caches and inner py_class data. /// It is Python's responsibility to call `update_nodemap_data` again. - def clearcaches(&self, *args, **kw) -> PyResult<PyObject> { + def clearcaches(&self) -> PyResult<PyObject> { self.nt(py).borrow_mut().take(); self.docket(py).borrow_mut().take(); - self.mmap(py).borrow_mut().take(); - self.call_cindex(py, "clearcaches", args, kw) + self.nodemap_mmap(py).borrow_mut().take(); + self.head_revs_py_list(py).borrow_mut().take(); + self.head_node_ids_py_list(py).borrow_mut().take(); + self.index(py).borrow().clear_caches(); + Ok(py.None()) } /// return the raw binary string representing a revision - def entry_binary(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "entry_binary", args, kw) + def entry_binary(&self, *args, **_kw) -> PyResult<PyObject> { + let rindex = self.index(py).borrow(); + let rev = UncheckedRevision(args.get_item(py, 0).extract(py)?); + let rust_bytes = rindex.check_revision(rev).and_then( + |r| rindex.entry_binary(r)) + .ok_or_else(|| rev_not_in_index(py, rev))?; + let rust_res = PyBytes::new(py, rust_bytes).into_object(); + Ok(rust_res) } /// return a binary packed version of the header - def pack_header(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "pack_header", args, kw) - } - - /// get an index entry - def get(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "get", args, kw) + def pack_header(&self, *args, **_kw) -> PyResult<PyObject> { + let rindex = self.index(py).borrow(); + let packed = rindex.pack_header(args.get_item(py, 0).extract(py)?); + let rust_res = PyBytes::new(py, &packed).into_object(); + Ok(rust_res) } /// compute phases - def computephasesmapsets(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "computephasesmapsets", args, kw) + def computephasesmapsets(&self, *args, **_kw) -> PyResult<PyObject> { + let py_roots = args.get_item(py, 0).extract::<PyDict>(py)?; + let rust_res = self.inner_computephasesmapsets(py, py_roots)?; + Ok(rust_res) } /// reachableroots - def reachableroots2(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "reachableroots2", args, kw) + def reachableroots2(&self, *args, **_kw) -> PyResult<PyObject> { + let rust_res = self.inner_reachableroots2( + py, + UncheckedRevision(args.get_item(py, 0).extract(py)?), + args.get_item(py, 1), + args.get_item(py, 2), + args.get_item(py, 3).extract(py)?, + )?; + Ok(rust_res) } /// get head revisions - def headrevs(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "headrevs", args, kw) + def headrevs(&self) -> PyResult<PyObject> { + let rust_res = self.inner_headrevs(py)?; + Ok(rust_res) + } + + /// get head nodeids + def head_node_ids(&self) -> PyResult<PyObject> { + let rust_res = self.inner_head_node_ids(py)?; + Ok(rust_res) } /// get filtered head revisions - def headrevsfiltered(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "headrevsfiltered", args, kw) + def headrevsfiltered(&self, *args, **_kw) -> PyResult<PyObject> { + let rust_res = self.inner_headrevsfiltered(py, &args.get_item(py, 0))?; + Ok(rust_res) } /// True if the object is a snapshot - def issnapshot(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "issnapshot", args, kw) + def issnapshot(&self, *args, **_kw) -> PyResult<bool> { + let index = self.index(py).borrow(); + let result = index + .is_snapshot(UncheckedRevision(args.get_item(py, 0).extract(py)?)) + .map_err(|e| { + PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string()) + })?; + Ok(result) } /// Gather snapshot data in a cache dict - def findsnapshots(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "findsnapshots", args, kw) + def findsnapshots(&self, *args, **_kw) -> PyResult<PyObject> { + let index = self.index(py).borrow(); + let cache: PyDict = args.get_item(py, 0).extract(py)?; + // this methods operates by setting new values in the cache, + // hence we will compare results by letting the C implementation + // operate over a deepcopy of the cache, and finally compare both + // caches. + let c_cache = PyDict::new(py); + for (k, v) in cache.items(py) { + c_cache.set_item(py, k, PySet::new(py, v)?)?; + } + + let start_rev = UncheckedRevision(args.get_item(py, 1).extract(py)?); + let end_rev = UncheckedRevision(args.get_item(py, 2).extract(py)?); + let mut cache_wrapper = PySnapshotsCache{ py, dict: cache }; + index.find_snapshots( + start_rev, + end_rev, + &mut cache_wrapper, + ).map_err(|_| revlog_error(py))?; + Ok(py.None()) } /// determine revisions with deltas to reconstruct fulltext - def deltachain(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "deltachain", args, kw) + def deltachain(&self, *args, **_kw) -> PyResult<PyObject> { + let index = self.index(py).borrow(); + let rev = args.get_item(py, 0).extract::<BaseRevision>(py)?.into(); + let stop_rev = + args.get_item(py, 1).extract::<Option<BaseRevision>>(py)?; + let rev = index.check_revision(rev).ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(rev)) + })?; + let stop_rev = if let Some(stop_rev) = stop_rev { + let stop_rev = UncheckedRevision(stop_rev); + Some(index.check_revision(stop_rev).ok_or_else(|| { + nodemap_error(py, NodeMapError::RevisionNotInIndex(stop_rev)) + })?) + } else {None}; + let using_general_delta = args.get_item(py, 2) + .extract::<Option<u32>>(py)? + .map(|i| i != 0); + let (chain, stopped) = index.delta_chain( + rev, stop_rev, using_general_delta + ).map_err(|e| { + PyErr::new::<cpython::exc::ValueError, _>(py, e.to_string()) + })?; + + let chain: Vec<_> = chain.into_iter().map(|r| r.0).collect(); + Ok( + PyTuple::new( + py, + &[ + chain.into_py_object(py).into_object(), + stopped.into_py_object(py).into_object() + ] + ).into_object() + ) + } /// slice planned chunk read to reach a density threshold - def slicechunktodensity(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "slicechunktodensity", args, kw) - } - - /// stats for the index - def stats(&self, *args, **kw) -> PyResult<PyObject> { - self.call_cindex(py, "stats", args, kw) + def slicechunktodensity(&self, *args, **_kw) -> PyResult<PyObject> { + let rust_res = self.inner_slicechunktodensity( + py, + args.get_item(py, 0), + args.get_item(py, 1).extract(py)?, + args.get_item(py, 2).extract(py)? + )?; + Ok(rust_res) } // index_sequence_methods and index_mapping_methods. @@ -244,45 +409,29 @@ // Since we call back through the high level Python API, // there's no point making a distinction between index_get // and index_getitem. + // gracinet 2023: this above is no longer true for the pure Rust impl def __len__(&self) -> PyResult<usize> { - self.cindex(py).borrow().inner().len(py) + self.len(py) } def __getitem__(&self, key: PyObject) -> PyResult<PyObject> { - // this conversion seems needless, but that's actually because - // `index_getitem` does not handle conversion from PyLong, - // which expressions such as [e for e in index] internally use. - // Note that we don't seem to have a direct way to call - // PySequence_GetItem (does the job), which would possibly be better - // for performance - let key = match key.extract::<i32>(py) { - Ok(rev) => rev.to_py_object(py).into_object(), - Err(_) => key, - }; - self.cindex(py).borrow().inner().get_item(py, key) - } - - def __setitem__(&self, key: PyObject, value: PyObject) -> PyResult<()> { - self.cindex(py).borrow().inner().set_item(py, key, value) + let rust_res = self.inner_getitem(py, key.clone_ref(py))?; + Ok(rust_res) } def __contains__(&self, item: PyObject) -> PyResult<bool> { // ObjectProtocol does not seem to provide contains(), so // this is an equivalent implementation of the index_contains() // defined in revlog.c - let cindex = self.cindex(py).borrow(); match item.extract::<i32>(py) { Ok(rev) => { - Ok(rev >= -1 && rev < cindex.inner().len(py)? as BaseRevision) + Ok(rev >= -1 && rev < self.len(py)? as BaseRevision) } Err(_) => { - cindex.inner().call_method( - py, - "has_node", - PyTuple::new(py, &[item]), - None)? - .extract(py) + let item_bytes: PyBytes = item.extract(py)?; + let rust_res = self.has_node(py, item_bytes)?; + Ok(rust_res) } } } @@ -304,37 +453,204 @@ @property def entry_size(&self) -> PyResult<PyInt> { - self.cindex(py).borrow().inner().getattr(py, "entry_size")?.extract::<PyInt>(py) + let rust_res: PyInt = INDEX_ENTRY_SIZE.to_py_object(py); + Ok(rust_res) } @property def rust_ext_compat(&self) -> PyResult<PyInt> { - self.cindex(py).borrow().inner().getattr(py, "rust_ext_compat")?.extract::<PyInt>(py) + // will be entirely removed when the Rust index yet useful to + // implement in Rust to detangle things when removing `self.cindex` + let rust_res: PyInt = 1.to_py_object(py); + Ok(rust_res) + } + + @property + def is_rust(&self) -> PyResult<PyBool> { + Ok(false.to_py_object(py)) } }); -impl MixedIndex { - fn new(py: Python, cindex: PyObject) -> PyResult<MixedIndex> { +/// Take a (potentially) mmap'ed buffer, and return the underlying Python +/// buffer along with the Rust slice into said buffer. We need to keep the +/// Python buffer around, otherwise we'd get a dangling pointer once the buffer +/// is freed from Python's side. +/// +/// # Safety +/// +/// The caller must make sure that the buffer is kept around for at least as +/// long as the slice. +#[deny(unsafe_op_in_unsafe_fn)] +unsafe fn mmap_keeparound( + py: Python, + data: PyObject, +) -> PyResult<( + PyBuffer, + Box<dyn std::ops::Deref<Target = [u8]> + Send + Sync + 'static>, +)> { + let buf = PyBuffer::get(py, &data)?; + let len = buf.item_count(); + + // Build a slice from the mmap'ed buffer data + let cbuf = buf.buf_ptr(); + let bytes = if std::mem::size_of::<u8>() == buf.item_size() + && buf.is_c_contiguous() + && u8::is_compatible_format(buf.format()) + { + unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) } + } else { + return Err(PyErr::new::<ValueError, _>( + py, + "Nodemap data buffer has an invalid memory representation" + .to_string(), + )); + }; + + Ok((buf, Box::new(bytes))) +} + +fn py_tuple_to_revision_data_params( + py: Python, + tuple: PyTuple, +) -> PyResult<RevisionDataParams> { + if tuple.len(py) < 8 { + // this is better than the panic promised by tup.get_item() + return Err(PyErr::new::<IndexError, _>( + py, + "tuple index out of range", + )); + } + let offset_or_flags: u64 = tuple.get_item(py, 0).extract(py)?; + let node_id = tuple + .get_item(py, 7) + .extract::<PyBytes>(py)? + .data(py) + .try_into() + .unwrap(); + let flags = (offset_or_flags & 0xFFFF) as u16; + let data_offset = offset_or_flags >> 16; + Ok(RevisionDataParams { + flags, + data_offset, + data_compressed_length: tuple.get_item(py, 1).extract(py)?, + data_uncompressed_length: tuple.get_item(py, 2).extract(py)?, + data_delta_base: tuple.get_item(py, 3).extract(py)?, + link_rev: tuple.get_item(py, 4).extract(py)?, + parent_rev_1: tuple.get_item(py, 5).extract(py)?, + parent_rev_2: tuple.get_item(py, 6).extract(py)?, + node_id, + ..Default::default() + }) +} +fn revision_data_params_to_py_tuple( + py: Python, + params: RevisionDataParams, +) -> PyTuple { + PyTuple::new( + py, + &[ + params.data_offset.into_py_object(py).into_object(), + params + .data_compressed_length + .into_py_object(py) + .into_object(), + params + .data_uncompressed_length + .into_py_object(py) + .into_object(), + params.data_delta_base.into_py_object(py).into_object(), + params.link_rev.into_py_object(py).into_object(), + params.parent_rev_1.into_py_object(py).into_object(), + params.parent_rev_2.into_py_object(py).into_object(), + PyBytes::new(py, ¶ms.node_id) + .into_py_object(py) + .into_object(), + params._sidedata_offset.into_py_object(py).into_object(), + params + ._sidedata_compressed_length + .into_py_object(py) + .into_object(), + params + .data_compression_mode + .into_py_object(py) + .into_object(), + params + ._sidedata_compression_mode + .into_py_object(py) + .into_object(), + params._rank.into_py_object(py).into_object(), + ], + ) +} + +struct PySnapshotsCache<'p> { + py: Python<'p>, + dict: PyDict, +} + +impl<'p> SnapshotsCache for PySnapshotsCache<'p> { + fn insert_for( + &mut self, + rev: BaseRevision, + value: BaseRevision, + ) -> Result<(), RevlogError> { + let pyvalue = value.into_py_object(self.py).into_object(); + match self.dict.get_item(self.py, rev) { + Some(obj) => obj + .extract::<PySet>(self.py) + .and_then(|set| set.add(self.py, pyvalue)), + None => PySet::new(self.py, vec![pyvalue]) + .and_then(|set| self.dict.set_item(self.py, rev, set)), + } + .map_err(|_| { + RevlogError::Other(HgError::unsupported( + "Error in Python caches handling", + )) + }) + } +} + +impl Index { + fn new(py: Python, data: PyObject, header: u32) -> PyResult<Self> { + // Safety: we keep the buffer around inside the class as `index_mmap` + let (buf, bytes) = unsafe { mmap_keeparound(py, data)? }; + Self::create_instance( py, - RefCell::new(cindex::Index::new(py, cindex)?), + hg::index::Index::new( + bytes, + IndexHeader::parse(&header.to_be_bytes()) + .expect("default header is broken") + .unwrap(), + ) + .map_err(|e| { + revlog_error_with_msg(py, e.to_string().as_bytes()) + })?, RefCell::new(None), RefCell::new(None), RefCell::new(None), + RefCell::new(Some(buf)), + RefCell::new(None), + RefCell::new(None), ) } + fn len(&self, py: Python) -> PyResult<usize> { + let rust_index_len = self.index(py).borrow().len(); + Ok(rust_index_len) + } + /// This is scaffolding at this point, but it could also become /// a way to start a persistent nodemap or perform a /// vacuum / repack operation fn fill_nodemap( &self, py: Python, - nt: &mut NodeTree, + nt: &mut CoreNodeTree, ) -> PyResult<PyObject> { - let index = self.cindex(py).borrow(); - for r in 0..index.len() { + let index = self.index(py).borrow(); + for r in 0..self.len(py)? { let rev = Revision(r as BaseRevision); // in this case node() won't ever return None nt.insert(&*index, index.node(rev).unwrap(), rev) @@ -346,34 +662,16 @@ fn get_nodetree<'a>( &'a self, py: Python<'a>, - ) -> PyResult<&'a RefCell<Option<NodeTree>>> { + ) -> PyResult<&'a RefCell<Option<CoreNodeTree>>> { if self.nt(py).borrow().is_none() { let readonly = Box::<Vec<_>>::default(); - let mut nt = NodeTree::load_bytes(readonly, 0); + let mut nt = CoreNodeTree::load_bytes(readonly, 0); self.fill_nodemap(py, &mut nt)?; self.nt(py).borrow_mut().replace(nt); } Ok(self.nt(py)) } - /// forward a method call to the underlying C index - fn call_cindex( - &self, - py: Python, - name: &str, - args: &PyTuple, - kwargs: Option<&PyDict>, - ) -> PyResult<PyObject> { - self.cindex(py) - .borrow() - .inner() - .call_method(py, name, args, kwargs) - } - - pub fn clone_cindex(&self, py: Python) -> cindex::Index { - self.cindex(py).borrow().clone_ref(py) - } - /// Returns the full nodemap bytes to be written as-is to disk fn inner_nodemap_data_all(&self, py: Python) -> PyResult<PyBytes> { let nodemap = self.get_nodetree(py)?.borrow_mut().take().unwrap(); @@ -382,7 +680,7 @@ // If there's anything readonly, we need to build the data again from // scratch let bytes = if readonly.len() > 0 { - let mut nt = NodeTree::load_bytes(Box::<Vec<_>>::default(), 0); + let mut nt = CoreNodeTree::load_bytes(Box::<Vec<_>>::default(), 0); self.fill_nodemap(py, &mut nt)?; let (readonly, bytes) = nt.into_readonly_and_added_bytes(); @@ -427,36 +725,19 @@ docket: PyObject, nm_data: PyObject, ) -> PyResult<PyObject> { - let buf = PyBuffer::get(py, &nm_data)?; + // Safety: we keep the buffer around inside the class as `nodemap_mmap` + let (buf, bytes) = unsafe { mmap_keeparound(py, nm_data)? }; let len = buf.item_count(); + self.nodemap_mmap(py).borrow_mut().replace(buf); - // Build a slice from the mmap'ed buffer data - let cbuf = buf.buf_ptr(); - let bytes = if std::mem::size_of::<u8>() == buf.item_size() - && buf.is_c_contiguous() - && u8::is_compatible_format(buf.format()) - { - unsafe { std::slice::from_raw_parts(cbuf as *const u8, len) } - } else { - return Err(PyErr::new::<ValueError, _>( - py, - "Nodemap data buffer has an invalid memory representation" - .to_string(), - )); - }; - - // Keep a reference to the mmap'ed buffer, otherwise we get a dangling - // pointer. - self.mmap(py).borrow_mut().replace(buf); - - let mut nt = NodeTree::load_bytes(Box::new(bytes), len); + let mut nt = CoreNodeTree::load_bytes(bytes, len); let data_tip = docket .getattr(py, "tip_rev")? .extract::<BaseRevision>(py)? .into(); self.docket(py).borrow_mut().replace(docket.clone_ref(py)); - let idx = self.cindex(py).borrow(); + let idx = self.index(py).borrow(); let data_tip = idx.check_revision(data_tip).ok_or_else(|| { nodemap_error(py, NodeMapError::RevisionNotInIndex(data_tip)) })?; @@ -473,8 +754,392 @@ Ok(py.None()) } + + fn inner_getitem(&self, py: Python, key: PyObject) -> PyResult<PyObject> { + let idx = self.index(py).borrow(); + Ok(match key.extract::<BaseRevision>(py) { + Ok(key_as_int) => { + let entry_params = if key_as_int == NULL_REVISION.0 { + RevisionDataParams::default() + } else { + let rev = UncheckedRevision(key_as_int); + match idx.entry_as_params(rev) { + Some(e) => e, + None => { + return Err(PyErr::new::<IndexError, _>( + py, + "revlog index out of range", + )); + } + } + }; + revision_data_params_to_py_tuple(py, entry_params) + .into_object() + } + _ => self.get_rev(py, key.extract::<PyBytes>(py)?)?.map_or_else( + || py.None(), + |py_rev| py_rev.into_py_object(py).into_object(), + ), + }) + } + + fn inner_head_node_ids(&self, py: Python) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + + // We don't use the shortcut here, as it's actually slower to loop + // through the cached `PyList` than to re-do the whole computation for + // large lists, which are the performance sensitive ones anyway. + let head_revs = index.head_revs().map_err(|e| graph_error(py, e))?; + let res: Vec<_> = head_revs + .iter() + .map(|r| { + PyBytes::new( + py, + index + .node(*r) + .expect("rev should have been in the index") + .as_bytes(), + ) + .into_object() + }) + .collect(); + + self.cache_new_heads_py_list(&head_revs, py); + self.cache_new_heads_node_ids_py_list(&head_revs, py); + + Ok(PyList::new(py, &res).into_object()) + } + + fn inner_headrevs(&self, py: Python) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + if let Some(new_heads) = + index.head_revs_shortcut().map_err(|e| graph_error(py, e))? + { + self.cache_new_heads_py_list(&new_heads, py); + } + + Ok(self + .head_revs_py_list(py) + .borrow() + .as_ref() + .expect("head revs should be cached") + .clone_ref(py) + .into_object()) + } + + fn inner_headrevsfiltered( + &self, + py: Python, + filtered_revs: &PyObject, + ) -> PyResult<PyObject> { + let index = &mut *self.index(py).borrow_mut(); + let filtered_revs = rev_pyiter_collect(py, filtered_revs, index)?; + + if let Some(new_heads) = index + .head_revs_filtered(&filtered_revs, true) + .map_err(|e| graph_error(py, e))? + { + self.cache_new_heads_py_list(&new_heads, py); + } + + Ok(self + .head_revs_py_list(py) + .borrow() + .as_ref() + .expect("head revs should be cached") + .clone_ref(py) + .into_object()) + } + + fn cache_new_heads_node_ids_py_list( + &self, + new_heads: &[Revision], + py: Python<'_>, + ) -> PyList { + let index = self.index(py).borrow(); + let as_vec: Vec<PyObject> = new_heads + .iter() + .map(|r| { + PyBytes::new( + py, + index + .node(*r) + .expect("rev should have been in the index") + .as_bytes(), + ) + .into_object() + }) + .collect(); + let new_heads_py_list = PyList::new(py, &as_vec); + *self.head_node_ids_py_list(py).borrow_mut() = + Some(new_heads_py_list.clone_ref(py)); + new_heads_py_list + } + + fn cache_new_heads_py_list( + &self, + new_heads: &[Revision], + py: Python<'_>, + ) -> PyList { + let as_vec: Vec<PyObject> = new_heads + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py).into_object()) + .collect(); + let new_heads_py_list = PyList::new(py, &as_vec); + *self.head_revs_py_list(py).borrow_mut() = + Some(new_heads_py_list.clone_ref(py)); + new_heads_py_list + } + + fn inner_ancestors( + &self, + py: Python, + py_revs: &PyTuple, + ) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?; + let as_vec: Vec<_> = index + .ancestors(&revs) + .map_err(|e| graph_error(py, e))? + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py).into_object()) + .collect(); + Ok(PyList::new(py, &as_vec).into_object()) + } + + fn inner_commonancestorsheads( + &self, + py: Python, + py_revs: &PyTuple, + ) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + let revs: Vec<_> = rev_pyiter_collect(py, py_revs.as_object(), index)?; + let as_vec: Vec<_> = index + .common_ancestor_heads(&revs) + .map_err(|e| graph_error(py, e))? + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py).into_object()) + .collect(); + Ok(PyList::new(py, &as_vec).into_object()) + } + + fn inner_computephasesmapsets( + &self, + py: Python, + py_roots: PyDict, + ) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + let opt = self.get_nodetree(py)?.borrow(); + let nt = opt.as_ref().unwrap(); + let roots: Result<HashMap<Phase, Vec<Revision>>, PyErr> = py_roots + .items_list(py) + .iter(py) + .map(|r| { + let phase = r.get_item(py, 0)?; + let nodes = r.get_item(py, 1)?; + // Transform the nodes from Python to revs here since we + // have access to the nodemap + let revs: Result<_, _> = nodes + .iter(py)? + .map(|node| match node?.extract::<PyBytes>(py) { + Ok(py_bytes) => { + let node = node_from_py_bytes(py, &py_bytes)?; + nt.find_bin(index, node.into()) + .map_err(|e| nodemap_error(py, e))? + .ok_or_else(|| revlog_error(py)) + } + Err(e) => Err(e), + }) + .collect(); + let phase = Phase::try_from(phase.extract::<usize>(py)?) + .map_err(|_| revlog_error(py)); + Ok((phase?, revs?)) + }) + .collect(); + let (len, phase_maps) = index + .compute_phases_map_sets(roots?) + .map_err(|e| graph_error(py, e))?; + + // Ugly hack, but temporary + const IDX_TO_PHASE_NUM: [usize; 4] = [1, 2, 32, 96]; + let py_phase_maps = PyDict::new(py); + for (idx, roots) in phase_maps.iter().enumerate() { + let phase_num = IDX_TO_PHASE_NUM[idx].into_py_object(py); + // OPTIM too bad we have to collect here. At least, we could + // reuse the same Vec and allocate it with capacity at + // max(len(phase_maps) + let roots_vec: Vec<PyInt> = roots + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py)) + .collect(); + py_phase_maps.set_item( + py, + phase_num, + PySet::new(py, roots_vec)?, + )?; + } + Ok(PyTuple::new( + py, + &[ + len.into_py_object(py).into_object(), + py_phase_maps.into_object(), + ], + ) + .into_object()) + } + + fn inner_slicechunktodensity( + &self, + py: Python, + revs: PyObject, + target_density: f64, + min_gap_size: usize, + ) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + let revs: Vec<_> = rev_pyiter_collect(py, &revs, index)?; + let as_nested_vec = + index.slice_chunk_to_density(&revs, target_density, min_gap_size); + let mut res = Vec::with_capacity(as_nested_vec.len()); + let mut py_chunk = Vec::new(); + for chunk in as_nested_vec { + py_chunk.clear(); + py_chunk.reserve_exact(chunk.len()); + for rev in chunk { + py_chunk.push( + PyRevision::from(rev).into_py_object(py).into_object(), + ); + } + res.push(PyList::new(py, &py_chunk).into_object()); + } + // This is just to do the same as C, not sure why it does this + if res.len() == 1 { + Ok(PyTuple::new(py, &res).into_object()) + } else { + Ok(PyList::new(py, &res).into_object()) + } + } + + fn inner_reachableroots2( + &self, + py: Python, + min_root: UncheckedRevision, + heads: PyObject, + roots: PyObject, + include_path: bool, + ) -> PyResult<PyObject> { + let index = &*self.index(py).borrow(); + let heads = rev_pyiter_collect_or_else(py, &heads, index, |_rev| { + PyErr::new::<IndexError, _>(py, "head out of range") + })?; + let roots: Result<_, _> = roots + .iter(py)? + .map(|r| { + r.and_then(|o| match o.extract::<PyRevision>(py) { + Ok(r) => Ok(UncheckedRevision(r.0)), + Err(e) => Err(e), + }) + }) + .collect(); + let as_set = index + .reachable_roots(min_root, heads, roots?, include_path) + .map_err(|e| graph_error(py, e))?; + let as_vec: Vec<PyObject> = as_set + .iter() + .map(|r| PyRevision::from(*r).into_py_object(py).into_object()) + .collect(); + Ok(PyList::new(py, &as_vec).into_object()) + } } +py_class!(pub class NodeTree |py| { + data nt: RefCell<CoreNodeTree>; + data index: RefCell<UnsafePyLeaked<PySharedIndex>>; + + def __new__(_cls, index: PyObject) -> PyResult<NodeTree> { + let index = py_rust_index_to_graph(py, index)?; + let nt = CoreNodeTree::default(); // in-RAM, fully mutable + Self::create_instance(py, RefCell::new(nt), RefCell::new(index)) + } + + /// Tell whether the NodeTree is still valid + /// + /// In case of mutation of the index, the given results are not + /// guaranteed to be correct, and in fact, the methods borrowing + /// the inner index would fail because of `PySharedRef` poisoning + /// (generation-based guard), same as iterating on a `dict` that has + /// been meanwhile mutated. + def is_invalidated(&self) -> PyResult<bool> { + let leaked = self.index(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let result = unsafe { leaked.try_borrow(py) }; + // two cases for result to be an error: + // - the index has previously been mutably borrowed + // - there is currently a mutable borrow + // in both cases this amounts for previous results related to + // the index to still be valid. + Ok(result.is_err()) + } + + def insert(&self, rev: PyRevision) -> PyResult<PyObject> { + let leaked = self.index(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = &*unsafe { leaked.try_borrow(py)? }; + + let rev = UncheckedRevision(rev.0); + let rev = index + .check_revision(rev) + .ok_or_else(|| rev_not_in_index(py, rev))?; + if rev == NULL_REVISION { + return Err(rev_not_in_index(py, rev.into())) + } + + let entry = index.inner.get_entry(rev).unwrap(); + let mut nt = self.nt(py).borrow_mut(); + nt.insert(index, entry.hash(), rev).map_err(|e| nodemap_error(py, e))?; + + Ok(py.None()) + } + + /// Lookup by node hex prefix in the NodeTree, returning revision number. + /// + /// This is not part of the classical NodeTree API, but is good enough + /// for unit testing, as in `test-rust-revlog.py`. + def prefix_rev_lookup( + &self, + node_prefix: PyBytes + ) -> PyResult<Option<PyRevision>> { + let prefix = NodePrefix::from_hex(node_prefix.data(py)) + .map_err(|_| PyErr::new::<ValueError, _>( + py, + format!("Invalid node or prefix {:?}", + node_prefix.as_object())) + )?; + + let nt = self.nt(py).borrow(); + let leaked = self.index(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let index = &*unsafe { leaked.try_borrow(py)? }; + + Ok(nt.find_bin(index, prefix) + .map_err(|e| nodemap_error(py, e))? + .map(|r| r.into()) + ) + } + + def shortest(&self, node: PyBytes) -> PyResult<usize> { + let nt = self.nt(py).borrow(); + let leaked = self.index(py).borrow(); + // Safety: we don't leak the "faked" reference out of `UnsafePyLeaked` + let idx = &*unsafe { leaked.try_borrow(py)? }; + match nt.unique_prefix_len_node(idx, &node_from_py_bytes(py, &node)?) + { + Ok(Some(l)) => Ok(l), + Ok(None) => Err(revlog_error(py)), + Err(e) => Err(nodemap_error(py, e)), + } + } +}); + fn revlog_error(py: Python) -> PyErr { match py .import("mercurial.error") @@ -488,7 +1153,28 @@ } } -fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr { +fn revlog_error_with_msg(py: Python, msg: &[u8]) -> PyErr { + match py + .import("mercurial.error") + .and_then(|m| m.get(py, "RevlogError")) + { + Err(e) => e, + Ok(cls) => PyErr::from_instance( + py, + cls.call(py, (PyBytes::new(py, msg),), None) + .ok() + .into_py_object(py), + ), + } +} + +fn graph_error(py: Python, _err: hg::GraphError) -> PyErr { + // ParentOutOfRange is currently the only alternative + // in `hg::GraphError`. The C index always raises this simple ValueError. + PyErr::new::<ValueError, _>(py, "parent out of range") +} + +fn nodemap_rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr { PyErr::new::<ValueError, _>( py, format!( @@ -499,11 +1185,18 @@ ) } +fn rev_not_in_index(py: Python, rev: UncheckedRevision) -> PyErr { + PyErr::new::<ValueError, _>( + py, + format!("revlog index out of range: {}", rev), + ) +} + /// Standard treatment of NodeMapError fn nodemap_error(py: Python, err: NodeMapError) -> PyErr { match err { NodeMapError::MultipleResults => revlog_error(py), - NodeMapError::RevisionNotInIndex(r) => rev_not_in_index(py, r), + NodeMapError::RevisionNotInIndex(r) => nodemap_rev_not_in_index(py, r), } } @@ -514,7 +1207,8 @@ m.add(py, "__package__", package)?; m.add(py, "__doc__", "RevLog - Rust implementations")?; - m.add_class::<MixedIndex>(py)?; + m.add_class::<Index>(py)?; + m.add_class::<NodeTree>(py)?; let sys = PyModule::import(py, "sys")?; let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
--- a/rust/rhg/src/commands/status.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/rhg/src/commands/status.rs Mon Feb 12 16:22:47 2024 +0100 @@ -28,14 +28,17 @@ get_bytes_from_os_str, get_bytes_from_os_string, get_path_from_bytes, }; use hg::utils::hg_path::{hg_path_to_path_buf, HgPath}; -use hg::DirstateStatus; use hg::PatternFileWarning; +use hg::Revision; use hg::StatusError; use hg::StatusOptions; use hg::{self, narrow, sparse}; +use hg::{DirstateStatus, RevlogOpenOptions}; use log::info; use rayon::prelude::*; +use std::borrow::Cow; use std::io; +use std::mem::take; use std::path::PathBuf; pub const HELP_TEXT: &str = " @@ -140,6 +143,38 @@ .action(clap::ArgAction::SetTrue) .long("verbose"), ) + .arg( + Arg::new("rev") + .help("show difference from/to revision") + .long("rev") + .num_args(1) + .action(clap::ArgAction::Append) + .value_name("REV"), + ) +} + +fn parse_revpair( + repo: &Repo, + revs: Option<Vec<String>>, +) -> Result<Option<(Revision, Revision)>, CommandError> { + let revs = match revs { + None => return Ok(None), + Some(revs) => revs, + }; + if revs.is_empty() { + return Ok(None); + } + if revs.len() != 2 { + return Err(CommandError::unsupported("expected 0 or 2 --rev flags")); + } + + let rev1 = &revs[0]; + let rev2 = &revs[1]; + let rev1 = hg::revset::resolve_single(rev1, repo) + .map_err(|e| (e, rev1.as_str()))?; + let rev2 = hg::revset::resolve_single(rev2, repo) + .map_err(|e| (e, rev2.as_str()))?; + Ok(Some((rev1, rev2))) } /// Pure data type allowing the caller to specify file states to display @@ -229,6 +264,7 @@ let config = invocation.config; let args = invocation.subcommand_args; + let revs = args.get_many::<String>("rev"); let print0 = args.get_flag("print0"); let verbose = args.get_flag("verbose") || config.get_bool(b"ui", b"verbose")? @@ -262,6 +298,7 @@ || config.get_bool(b"ui", b"statuscopies")?; let repo = invocation.repo?; + let revpair = parse_revpair(repo, revs.map(|i| i.cloned().collect()))?; if verbose && has_unfinished_state(repo)? { return Err(CommandError::unsupported( @@ -285,13 +322,37 @@ type StatusResult<'a> = Result<(DirstateStatus<'a>, Vec<PatternFileWarning>), StatusError>; + let relative_status = config + .get_option(b"commands", b"status.relative")? + .expect("commands.status.relative should have a default value"); + + let relativize_paths = relative_status || { + // See in Python code with `getuipathfn` usage in `commands.py`. + let legacy_relative_behavior = args.contains_id("file"); + match relative_paths(invocation.config)? { + RelativePaths::Legacy => legacy_relative_behavior, + RelativePaths::Bool(v) => v, + } + }; + + let mut output = DisplayStatusPaths { + ui, + no_status, + relativize: if relativize_paths { + Some(RelativizePaths::new(repo)?) + } else { + None + }, + print0, + }; + let after_status = |res: StatusResult| -> Result<_, CommandError> { let (mut ds_status, pattern_warnings) = res?; for warning in pattern_warnings { ui.write_stderr(&format_pattern_file_warning(&warning, repo))?; } - for (path, error) in ds_status.bad { + for (path, error) in take(&mut ds_status.bad) { let error = match error { hg::BadMatch::OsError(code) => { std::io::Error::from_raw_os_error(code).to_string() @@ -322,8 +383,8 @@ })?; let working_directory_vfs = repo.working_directory_vfs(); let store_vfs = repo.store_vfs(); - let res: Vec<_> = ds_status - .unsure + let revlog_open_options = repo.default_revlog_options(false)?; + let res: Vec<_> = take(&mut ds_status.unsure) .into_par_iter() .map(|to_check| { // The compiler seems to get a bit confused with complex @@ -336,6 +397,7 @@ check_exec, &manifest, &to_check.path, + revlog_open_options, ) { Err(HgError::IoError { .. }) => { // IO errors most likely stem from the file being @@ -370,55 +432,12 @@ } } - let relative_status = config - .get_option(b"commands", b"status.relative")? - .expect("commands.status.relative should have a default value"); - - let relativize_paths = relative_status || { - // See in Python code with `getuipathfn` usage in `commands.py`. - let legacy_relative_behavior = args.contains_id("file"); - match relative_paths(invocation.config)? { - RelativePaths::Legacy => legacy_relative_behavior, - RelativePaths::Bool(v) => v, - } - }; - - let output = DisplayStatusPaths { - ui, - no_status, - relativize: if relativize_paths { - Some(RelativizePaths::new(repo)?) - } else { - None - }, - print0, - }; - if display_states.modified { - output.display(b"M ", "status.modified", ds_status.modified)?; - } - if display_states.added { - output.display(b"A ", "status.added", ds_status.added)?; - } - if display_states.removed { - output.display(b"R ", "status.removed", ds_status.removed)?; - } - if display_states.deleted { - output.display(b"! ", "status.deleted", ds_status.deleted)?; - } - if display_states.unknown { - output.display(b"? ", "status.unknown", ds_status.unknown)?; - } - if display_states.ignored { - output.display(b"I ", "status.ignored", ds_status.ignored)?; - } - if display_states.clean { - output.display(b"C ", "status.clean", ds_status.clean)?; - } - let dirstate_write_needed = ds_status.dirty; let filesystem_time_at_status_start = ds_status.filesystem_time_at_status_start; + output.output(display_states, ds_status)?; + Ok(( fixup, dirstate_write_needed, @@ -426,6 +445,54 @@ )) }; let (narrow_matcher, narrow_warnings) = narrow::matcher(repo)?; + + if let Some((rev1, rev2)) = revpair { + let mut ds_status = DirstateStatus::default(); + if list_copies { + return Err(CommandError::unsupported( + "status --rev --rev with copy information is not implemented yet", + )); + } + + let stat = hg::operations::status_rev_rev_no_copies( + repo, + rev1, + rev2, + narrow_matcher, + )?; + for entry in stat.iter() { + let (path, status) = entry?; + let path = StatusPath { + path: Cow::Borrowed(path), + copy_source: None, + }; + match status { + hg::operations::DiffStatus::Removed => { + if display_states.removed { + ds_status.removed.push(path) + } + } + hg::operations::DiffStatus::Added => { + if display_states.added { + ds_status.added.push(path) + } + } + hg::operations::DiffStatus::Modified => { + if display_states.modified { + ds_status.modified.push(path) + } + } + hg::operations::DiffStatus::Matching => { + if display_states.clean { + ds_status.clean.push(path) + } + } + } + } + output.output(display_states, ds_status)?; + return Ok(()); + } + let (sparse_matcher, sparse_warnings) = sparse::matcher(repo)?; let matcher = match (repo.has_narrow(), repo.has_sparse()) { (true, true) => { @@ -628,6 +695,35 @@ } Ok(()) } + + fn output( + &mut self, + display_states: DisplayStates, + ds_status: DirstateStatus, + ) -> Result<(), CommandError> { + if display_states.modified { + self.display(b"M ", "status.modified", ds_status.modified)?; + } + if display_states.added { + self.display(b"A ", "status.added", ds_status.added)?; + } + if display_states.removed { + self.display(b"R ", "status.removed", ds_status.removed)?; + } + if display_states.deleted { + self.display(b"! ", "status.deleted", ds_status.deleted)?; + } + if display_states.unknown { + self.display(b"? ", "status.unknown", ds_status.unknown)?; + } + if display_states.ignored { + self.display(b"I ", "status.ignored", ds_status.ignored)?; + } + if display_states.clean { + self.display(b"C ", "status.clean", ds_status.clean)?; + } + Ok(()) + } } /// Outcome of the additional check for an ambiguous tracked file @@ -650,6 +746,7 @@ check_exec: bool, manifest: &Manifest, hg_path: &HgPath, + revlog_open_options: RevlogOpenOptions, ) -> Result<UnsureOutcome, HgError> { let vfs = working_directory_vfs; let fs_path = hg_path_to_path_buf(hg_path).expect("HgPath conversion"); @@ -681,7 +778,11 @@ if entry_flags != fs_flags { return Ok(UnsureOutcome::Modified); } - let filelog = hg::filelog::Filelog::open_vfs(&store_vfs, hg_path)?; + let filelog = hg::filelog::Filelog::open_vfs( + &store_vfs, + hg_path, + revlog_open_options, + )?; let fs_len = fs_metadata.len(); let file_node = entry.node_id()?; let filelog_entry = filelog.entry_for_node(file_node).map_err(|_| {
--- a/rust/rhg/src/main.rs Mon Feb 12 16:17:08 2024 +0100 +++ b/rust/rhg/src/main.rs Mon Feb 12 16:22:47 2024 +0100 @@ -524,13 +524,20 @@ std::process::exit(exit_code(&result, use_detailed_exit_code)) } +mod commands { + pub mod cat; + pub mod config; + pub mod debugdata; + pub mod debugignorerhg; + pub mod debugrequirements; + pub mod debugrhgsparse; + pub mod files; + pub mod root; + pub mod status; +} + macro_rules! subcommands { ($( $command: ident )+) => { - mod commands { - $( - pub mod $command; - )+ - } fn add_subcommand_args(app: clap::Command) -> clap::Command { app
--- a/setup.py Mon Feb 12 16:17:08 2024 +0100 +++ b/setup.py Mon Feb 12 16:22:47 2024 +0100 @@ -221,6 +221,9 @@ self.cmd = cmd self.env = env + def __repr__(self): + return f"<hgcommand cmd={self.cmd} env={self.env}>" + def run(self, args): cmd = self.cmd + args returncode, out, err = runcmd(cmd, self.env) @@ -295,9 +298,15 @@ if attempt(hgcmd + check_cmd, hgenv): return hgcommand(hgcmd, hgenv) - # Fall back to trying the local hg installation. + # Fall back to trying the local hg installation (pure python) + repo_hg = os.path.join(os.path.dirname(__file__), 'hg') hgenv = localhgenv() - hgcmd = [sys.executable, 'hg'] + hgcmd = [sys.executable, repo_hg] + if attempt(hgcmd + check_cmd, hgenv): + return hgcommand(hgcmd, hgenv) + # Fall back to trying the local hg installation (whatever we can) + hgenv = localhgenv(pure_python=False) + hgcmd = [sys.executable, repo_hg] if attempt(hgcmd + check_cmd, hgenv): return hgcommand(hgcmd, hgenv) @@ -319,17 +328,18 @@ return None -def localhgenv(): +def localhgenv(pure_python=True): """Get an environment dictionary to use for invoking or importing mercurial from the local repository.""" # Execute hg out of this directory with a custom environment which takes # care to not use any hgrc files and do no localization. env = { - 'HGMODULEPOLICY': 'py', 'HGRCPATH': '', 'LANGUAGE': 'C', 'PATH': '', } # make pypi modules that use os.environ['PATH'] happy + if pure_python: + env['HGMODULEPOLICY'] = 'py' if 'LD_LIBRARY_PATH' in os.environ: env['LD_LIBRARY_PATH'] = os.environ['LD_LIBRARY_PATH'] if 'SystemRoot' in os.environ: @@ -1821,5 +1831,5 @@ 'welcome': 'contrib/packaging/macosx/Welcome.html', }, }, - **extra + **extra, )
--- a/tests/common-pattern.py Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/common-pattern.py Mon Feb 12 16:22:47 2024 +0100 @@ -127,6 +127,10 @@ br'\b(\d+) total queries in \d.\d\d\d\ds\b', lambda m: (br'%s total queries in *.????s (glob)' % m.group(1)), ), + ( + br'\belapsed time: \d+.\d{6} seconds\b', + br'elapsed time: *.?????? seconds (glob)', + ), ] # Various platform error strings, keyed on a common replacement string
--- a/tests/test-bundle.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-bundle.t Mon Feb 12 16:22:47 2024 +0100 @@ -286,13 +286,17 @@ #if reporevlogstore rust $ hg -R test debugcreatestreamclonebundle packed.hg - writing 2665 bytes for 6 files + writing 2665 bytes for 6 files (no-rust !) + writing 2919 bytes for 9 files (rust !) bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog $ f -B 64 --size --sha1 --hexdump packed.hg - packed.hg: size=2865, sha1=353d10311f4befa195d9a1ca4b8e26518115c702 - 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| - 0010: 00 00 00 00 0a 69 00 3b 67 65 6e 65 72 61 6c 64 |.....i.;generald| + packed.hg: size=2865, sha1=353d10311f4befa195d9a1ca4b8e26518115c702 (no-rust !) + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| (no-rust !) + 0010: 00 00 00 00 0a 69 00 3b 67 65 6e 65 72 61 6c 64 |.....i.;generald| (no-rust !) + packed.hg: size=3181, sha1=b202787710a1c109246554be589506cd2916acb7 (rust !) + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 09 00 00 |HGS1UN..........| (rust !) + 0010: 00 00 00 00 0b 67 00 3b 67 65 6e 65 72 61 6c 64 |.....g.;generald| (rust !) 0020: 65 6c 74 61 2c 72 65 76 6c 6f 67 2d 63 6f 6d 70 |elta,revlog-comp| 0030: 72 65 73 73 69 6f 6e 2d 7a 73 74 64 2c 72 65 76 |ression-zstd,rev| $ hg debugbundle --spec packed.hg @@ -302,12 +306,12 @@ #if reporevlogstore no-rust zstd $ hg -R test debugcreatestreamclonebundle packed.hg - writing 2665 bytes for 6 files + writing 2665 bytes for 7 files bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog $ f -B 64 --size --sha1 --hexdump packed.hg - packed.hg: size=2865, sha1=353d10311f4befa195d9a1ca4b8e26518115c702 - 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| + packed.hg: size=2882, sha1=6525b07e6bfced4b6c2319cb58c6ff76ca72fa13 + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 07 00 00 |HGS1UN..........| 0010: 00 00 00 00 0a 69 00 3b 67 65 6e 65 72 61 6c 64 |.....i.;generald| 0020: 65 6c 74 61 2c 72 65 76 6c 6f 67 2d 63 6f 6d 70 |elta,revlog-comp| 0030: 72 65 73 73 69 6f 6e 2d 7a 73 74 64 2c 72 65 76 |ression-zstd,rev| @@ -318,12 +322,12 @@ #if reporevlogstore no-rust no-zstd $ hg -R test debugcreatestreamclonebundle packed.hg - writing 2664 bytes for 6 files + writing 2664 bytes for 7 files bundle requirements: generaldelta, revlogv1, sparserevlog $ f -B 64 --size --sha1 --hexdump packed.hg - packed.hg: size=2840, sha1=12bf3eee3eb8a04c503ce2d29b48f0135c7edff5 - 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| + packed.hg: size=2857, sha1=3a7353323915b095baa6f2ee0a5aed588f11f5f0 + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 07 00 00 |HGS1UN..........| 0010: 00 00 00 00 0a 68 00 23 67 65 6e 65 72 61 6c 64 |.....h.#generald| 0020: 65 6c 74 61 2c 72 65 76 6c 6f 67 76 31 2c 73 70 |elta,revlogv1,sp| 0030: 61 72 73 65 72 65 76 6c 6f 67 00 64 61 74 61 2f |arserevlog.data/| @@ -346,13 +350,17 @@ #if reporevlogstore rust $ hg -R testnongd debugcreatestreamclonebundle packednongd.hg - writing 301 bytes for 3 files + writing 301 bytes for 3 files (no-rust !) + writing 427 bytes for 6 files (rust !) bundle requirements: revlog-compression-zstd, revlogv1 $ f -B 64 --size --sha1 --hexdump packednongd.hg - packednongd.hg: size=407, sha1=0b8714422b785ba8eb98c916b41ffd5fb994c9b5 - 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 03 00 00 |HGS1UN..........| - 0010: 00 00 00 00 01 2d 00 21 72 65 76 6c 6f 67 2d 63 |.....-.!revlog-c| + packednongd.hg: size=407, sha1=0b8714422b785ba8eb98c916b41ffd5fb994c9b5 (no-rust !) + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 03 00 00 |HGS1UN..........| (no-rust !) + 0010: 00 00 00 00 01 2d 00 21 72 65 76 6c 6f 67 2d 63 |.....-.!revlog-c| (no-rust !) + packednongd.hg: size=593, sha1=1ad0cbea11b5dd7b0437e54ae20fc5f8df118521 (rust !) + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 06 00 00 |HGS1UN..........| (rust !) + 0010: 00 00 00 00 01 ab 00 21 72 65 76 6c 6f 67 2d 63 |.......!revlog-c| (rust !) 0020: 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a 73 74 64 2c |ompression-zstd,| 0030: 72 65 76 6c 6f 67 76 31 00 64 61 74 61 2f 66 6f |revlogv1.data/fo| @@ -364,12 +372,12 @@ #if reporevlogstore no-rust zstd $ hg -R testnongd debugcreatestreamclonebundle packednongd.hg - writing 301 bytes for 3 files + writing 301 bytes for 4 files bundle requirements: revlog-compression-zstd, revlogv1 $ f -B 64 --size --sha1 --hexdump packednongd.hg - packednongd.hg: size=407, sha1=0b8714422b785ba8eb98c916b41ffd5fb994c9b5 - 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 03 00 00 |HGS1UN..........| + packednongd.hg: size=423, sha1=4269c89cf64b6a4377be75a3983771c4153362bf + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 04 00 00 |HGS1UN..........| 0010: 00 00 00 00 01 2d 00 21 72 65 76 6c 6f 67 2d 63 |.....-.!revlog-c| 0020: 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a 73 74 64 2c |ompression-zstd,| 0030: 72 65 76 6c 6f 67 76 31 00 64 61 74 61 2f 66 6f |revlogv1.data/fo| @@ -383,12 +391,12 @@ #if reporevlogstore no-rust no-zstd $ hg -R testnongd debugcreatestreamclonebundle packednongd.hg - writing 301 bytes for 3 files + writing 301 bytes for 4 files bundle requirements: revlogv1 $ f -B 64 --size --sha1 --hexdump packednongd.hg - packednongd.hg: size=383, sha1=1d9c230238edd5d38907100b729ba72b1831fe6f - 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 03 00 00 |HGS1UN..........| + packednongd.hg: size=399, sha1=99bb89decfc6674a3cf2cc87accc8c5332ede7fd + 0000: 48 47 53 31 55 4e 00 00 00 00 00 00 00 04 00 00 |HGS1UN..........| 0010: 00 00 00 00 01 2d 00 09 72 65 76 6c 6f 67 76 31 |.....-..revlogv1| 0020: 00 64 61 74 61 2f 66 6f 6f 2e 69 00 36 34 0a 00 |.data/foo.i.64..| 0030: 01 00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 |................| @@ -416,7 +424,8 @@ $ hg -R testsecret debugcreatestreamclonebundle packedsecret.hg (warning: stream clone bundle will contain secret revisions) - writing 301 bytes for 3 files + writing 301 bytes for 3 files (no-rust !) + writing 427 bytes for 6 files (rust !) bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog #endif @@ -425,7 +434,7 @@ $ hg -R testsecret debugcreatestreamclonebundle packedsecret.hg (warning: stream clone bundle will contain secret revisions) - writing 301 bytes for 3 files + writing 301 bytes for 4 files bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog #endif @@ -434,7 +443,7 @@ $ hg -R testsecret debugcreatestreamclonebundle packedsecret.hg (warning: stream clone bundle will contain secret revisions) - writing 301 bytes for 3 files + writing 301 bytes for 4 files bundle requirements: generaldelta, revlogv1, sparserevlog #endif @@ -479,10 +488,12 @@ > EOF $ hg -R packed debugapplystreamclonebundle packed.hg - 6 files to transfer, 2.60 KB of data + 7 files to transfer, 2.60 KB of data (no-rust !) + 9 files to transfer, 2.85 KB of data (rust !) pretxnopen: 000000000000 pretxnclose: aa35859c02ea - transferred 2.60 KB in * seconds (* */sec) (glob) + transferred 2.60 KB in * seconds (* */sec) (glob) (no-rust !) + transferred 2.85 KB in * seconds (* */sec) (glob) (rust !) txnclose: aa35859c02ea (for safety, confirm visibility of streamclone-ed changes by another @@ -1051,8 +1062,8 @@ DBG-DELTAS: CHANGELOG: rev=2: delta-base=2 is-cached=1 - search-rounds=0 try-count=0 - delta-type=full snap-depth=0 - p1-chain-length=0 p2-chain-length=-1 - duration=* (glob) adding manifests DBG-DELTAS: MANIFESTLOG: rev=0: delta-base=0 is-cached=1 - search-rounds=0 try-count=0 - delta-type=full snap-depth=0 - p1-chain-length=-1 p2-chain-length=-1 - duration=* (glob) - DBG-DELTAS: MANIFESTLOG: rev=1: delta-base=0 is-cached=1 - search-rounds=1 try-count=1 - delta-type=delta snap-depth=0 - p1-chain-length=0 p2-chain-length=-1 - duration=* (glob) - DBG-DELTAS: MANIFESTLOG: rev=2: delta-base=1 is-cached=1 - search-rounds=1 try-count=1 - delta-type=delta snap-depth=0 - p1-chain-length=1 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: MANIFESTLOG: rev=1: delta-base=0 is-cached=1 - search-rounds=1 try-count=1 - delta-type=delta snap-depth=-1 - p1-chain-length=0 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: MANIFESTLOG: rev=2: delta-base=1 is-cached=1 - search-rounds=1 try-count=1 - delta-type=delta snap-depth=-1 - p1-chain-length=1 p2-chain-length=-1 - duration=* (glob) adding file changes DBG-DELTAS: FILELOG:a: rev=0: delta-base=0 is-cached=1 - search-rounds=0 try-count=0 - delta-type=full snap-depth=0 - p1-chain-length=-1 p2-chain-length=-1 - duration=* (glob) DBG-DELTAS: FILELOG:b: rev=0: delta-base=0 is-cached=1 - search-rounds=0 try-count=0 - delta-type=full snap-depth=0 - p1-chain-length=-1 p2-chain-length=-1 - duration=* (glob)
--- a/tests/test-bundle2-exchange.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-bundle2-exchange.t Mon Feb 12 16:22:47 2024 +0100 @@ -1042,6 +1042,8 @@ adding changesets remote: abort: incompatible Mercurial client; bundle2 required remote: (see https://www.mercurial-scm.org/wiki/IncompatibleClient) + transaction abort! + rollback completed abort: stream ended unexpectedly (got 0 bytes, expected 4) [255]
--- a/tests/test-censor.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-censor.t Mon Feb 12 16:22:47 2024 +0100 @@ -78,6 +78,9 @@ $ mkdir -p foo/bar/baz $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C2 -t "remove password" ../../../target + checking for the censored content in 2 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg cat -r $H1 target | head -n 10 Tainted file is now sanitized $ hg cat -r $H2 target | head -n 10 @@ -96,6 +99,9 @@ (this also tests file pattern matching: with 'path:' scheme) $ hg --config extensions.censor= --cwd foo/bar/baz censor -r $C1 path:target + checking for the censored content in 2 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg cat -r $H1 target | head -n 10 Tainted file is now sanitized $ hg cat -r $H2 target | head -n 10 @@ -235,6 +241,9 @@ $ hg ci -m 'advance head H1' target $ H1=`hg id --debug -i` $ hg --config extensions.censor= censor -r $C3 target + checking for the censored content in 2 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg update -r $H2 1 files updated, 0 files merged, 0 files removed, 0 files unresolved $ hg merge -r $C3 @@ -247,6 +256,7 @@ $ hg update -C -r $H2 1 files updated, 0 files merged, 0 files removed, 0 files unresolved $ hg --config extensions.censor= censor -r $H2 target + checking for the censored content in 2 heads abort: cannot censor file in heads (78a8fc215e79) (clean/delete and commit first) [255] @@ -254,6 +264,7 @@ $ hg ci -m 'bystander commit' $ H2=`hg id --debug -i` $ hg --config extensions.censor= censor -r "$H2^" target + checking for the censored content in 2 heads abort: cannot censor file in heads (efbe78065929) (clean/delete and commit first) [255] @@ -266,6 +277,8 @@ $ hg update -r "$H2^" 1 files updated, 0 files merged, 0 files removed, 0 files unresolved $ hg --config extensions.censor= censor -r . target + checking for the censored content in 2 heads + checking for the censored content in the working directory abort: cannot censor working directory (clean/delete/update first) [255] @@ -279,6 +292,9 @@ $ hg ci -m 'delete target so it may be censored' $ H2=`hg id --debug -i` $ hg --config extensions.censor= censor -r $C4 target + checking for the censored content in 2 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg cat -r $C4 target | head -n 10 $ hg cat -r "$H2^^" target | head -n 10 Tainted file now super sanitized @@ -311,6 +327,9 @@ rev-count data-size inl type target 10 ?????? no file target (glob) $ hg --config extensions.censor= censor -r $C5 target + checking for the censored content in 2 heads + checking for the censored content in the working directory + censoring 1 file revisions The important part is for the censor operation to not crash and the repository to not be corrupted. Right now this involve keeping the revlog split. @@ -389,6 +408,9 @@ $ hg cat -r $REV target | head -n 10 Passwords: hunter2hunter2 $ hg --config extensions.censor= censor -r $REV target + checking for the censored content in 3 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg cat -r $REV target | head -n 10 $ hg cat -r $CLEANREV target | head -n 10 Re-sanitized; nothing to see here @@ -489,6 +511,9 @@ $ hg init ../rinit $ hg --config extensions.censor= censor -r 0 target + checking for the censored content in 3 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg bundle -r 0 --base null ../rinit/initbundle 1 changesets found $ cd ../rinit @@ -501,6 +526,22 @@ (run 'hg update' to get a working copy) $ hg cat -r 0 target | head -n 10 +Can skip the head checking steps + + $ hg --config extensions.censor= censor -r 0 --no-check-heads target + checking for the censored content in the working directory + censoring 1 file revisions + +Can censor multiple revision in one go. + + $ cd ../r + $ hg --config extensions.censor= censor -r 0+1 target + checking for the censored content in 3 heads + checking for the censored content in the working directory + censoring 2 file revisions + + + #if revlogv2 Testing feature that does not work in revlog v1 @@ -539,6 +580,9 @@ $ hg cat -r $B1 target | wc -l *50002 (re) $ hg --config extensions.censor= censor -r $B1 target + checking for the censored content in 1 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg cat -r $B1 target | wc -l *0 (re)
--- a/tests/test-censor2.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-censor2.t Mon Feb 12 16:22:47 2024 +0100 @@ -15,6 +15,9 @@ $ echo erased-secret > target $ hg commit -m "erased secret" $ hg censor target --config extensions.censor= -r ".^^" + checking for the censored content in 1 heads + checking for the censored content in the working directory + censoring 1 file revisions $ hg update ".^" 1 files updated, 0 files merged, 0 files removed, 0 files unresolved $ cat target
--- a/tests/test-clone-stream-revlog-split.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-clone-stream-revlog-split.t Mon Feb 12 16:22:47 2024 +0100 @@ -102,21 +102,28 @@ bundle2-input-part: "stream2" (params: 3 mandatory) supported (stream-bundle2-v2 !) bundle2-input-part: "stream3-exp" (params: 1 mandatory) supported (stream-bundle2-v3 !) applying stream bundle - 7 files to transfer, 2.11 KB of data (stream-bundle2-v2 !) + 8 files to transfer, 2.11 KB of data (stream-bundle2-v2 no-rust !) + 10 files to transfer, 2.29 KB of data (stream-bundle2-v2 rust !) adding [s] data/some-file.i (1.23 KB) (stream-bundle2-v2 !) 7 entries to transfer (stream-bundle2-v3 !) adding [s] data/some-file.d (1.04 KB) (stream-bundle2-v3 !) adding [s] data/some-file.i (192 bytes) (stream-bundle2-v3 !) adding [s] phaseroots (43 bytes) adding [s] 00manifest.i (348 bytes) - adding [s] 00changelog.i (381 bytes) + adding [s] 00changelog.n (62 bytes) (rust !) + adding [s] 00changelog-88698448.nd (128 bytes) (rust !) + adding [s] 00changelog.d (189 bytes) + adding [s] 00changelog.i (192 bytes) adding [c] branch2-served (94 bytes) adding [c] rbc-names-v1 (7 bytes) adding [c] rbc-revs-v1 (24 bytes) updating the branch cache - transferred 2.11 KB in * seconds (* */sec) (glob) - bundle2-input-part: total payload size 2268 (stream-bundle2-v2 !) - bundle2-input-part: total payload size 2296 (stream-bundle2-v3 !) + transferred 2.11 KB in * seconds (* */sec) (glob) (no-rust !) + transferred 2.29 KB in * seconds (* */sec) (glob) (rust !) + bundle2-input-part: total payload size 2285 (stream-bundle2-v2 no-rust !) + bundle2-input-part: total payload size 2518 (stream-bundle2-v2 rust !) + bundle2-input-part: total payload size 2313 (stream-bundle2-v3 no-rust !) + bundle2-input-part: total payload size 2546 (stream-bundle2-v3 rust !) bundle2-input-part: "listkeys" (params: 1 mandatory) supported bundle2-input-bundle: 2 parts total checking for updated bookmarks
--- a/tests/test-clone-stream.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-clone-stream.t Mon Feb 12 16:22:47 2024 +0100 @@ -6,6 +6,10 @@ $ cat << EOF >> $HGRCPATH > [server] > bundle2.stream = no + > [format] + > # persistent nodemap is too broken with legacy format, + > # however client with nodemap support will have better stream support. + > use-persistent-nodemap=no > EOF #endif #if stream-bundle2-v3 @@ -328,9 +332,9 @@ #if stream-legacy $ hg clone --stream -U http://localhost:$HGPORT clone1 streaming all changes - 1090 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !) searching for changes no changes found @@ -339,10 +343,12 @@ #if stream-bundle2-v2 $ hg clone --stream -U http://localhost:$HGPORT clone1 streaming all changes - 1093 files to transfer, 102 KB of data (no-zstd !) + 1094 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1093 files to transfer, 98.9 KB of data (zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) + 1094 files to transfer, 98.9 KB of data (zstd no-rust !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1096 files to transfer, 99.0 KB of data (zstd rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) $ ls -1 clone1/.hg/cache branch2-base @@ -362,7 +368,8 @@ streaming all changes 1093 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) $ ls -1 clone1/.hg/cache branch2-base @@ -387,12 +394,12 @@ #if no-zstd no-rust $ f --size --hex --bytes 256 body - body: size=119123 + body: size=119140 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......| 0010: 62 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 |b.STREAM2.......| 0020: 06 09 04 0c 26 62 79 74 65 63 6f 75 6e 74 31 30 |....&bytecount10| 0030: 34 31 31 35 66 69 6c 65 63 6f 75 6e 74 31 30 39 |4115filecount109| - 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen| + 0040: 34 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |4requirementsgen| 0050: 65 72 61 6c 64 65 6c 74 61 25 32 43 72 65 76 6c |eraldelta%2Crevl| 0060: 6f 67 76 31 25 32 43 73 70 61 72 73 65 72 65 76 |ogv1%2Csparserev| 0070: 6c 6f 67 00 00 80 00 73 08 42 64 61 74 61 2f 30 |log....s.Bdata/0| @@ -407,14 +414,14 @@ #endif #if zstd no-rust $ f --size --hex --bytes 256 body - body: size=116310 (no-bigendian !) - body: size=116305 (bigendian !) + body: size=116327 (no-bigendian !) + body: size=116322 (bigendian !) 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......| 0010: 7c 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 ||.STREAM2.......| 0020: 06 09 04 0c 40 62 79 74 65 63 6f 75 6e 74 31 30 |....@bytecount10| 0030: 31 32 37 36 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1276filecount109| (no-bigendian !) 0030: 31 32 37 31 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1271filecount109| (bigendian !) - 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen| + 0040: 34 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |4requirementsgen| 0050: 65 72 61 6c 64 65 6c 74 61 25 32 43 72 65 76 6c |eraldelta%2Crevl| 0060: 6f 67 2d 63 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a |og-compression-z| 0070: 73 74 64 25 32 43 72 65 76 6c 6f 67 76 31 25 32 |std%2Crevlogv1%2| @@ -429,12 +436,22 @@ #endif #if zstd rust no-dirstate-v2 $ f --size --hex --bytes 256 body - body: size=116310 + body: size=116310 (no-rust !) + body: size=116495 (rust no-stream-legacy no-bigendian !) + body: size=116490 (rust no-stream-legacy bigendian !) + body: size=116327 (rust stream-legacy no-bigendian !) + body: size=116322 (rust stream-legacy bigendian !) 0000: 04 6e 6f 6e 65 48 47 32 30 00 00 00 00 00 00 00 |.noneHG20.......| 0010: 7c 07 53 54 52 45 41 4d 32 00 00 00 00 03 00 09 ||.STREAM2.......| 0020: 06 09 04 0c 40 62 79 74 65 63 6f 75 6e 74 31 30 |....@bytecount10| - 0030: 31 32 37 36 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1276filecount109| - 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen| + 0030: 31 32 37 36 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1276filecount109| (no-rust !) + 0040: 33 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |3requirementsgen| (no-rust !) + 0030: 31 34 30 32 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1402filecount109| (rust no-stream-legacy no-bigendian !) + 0030: 31 33 39 37 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1397filecount109| (rust no-stream-legacy bigendian !) + 0040: 36 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |6requirementsgen| (rust no-stream-legacy !) + 0030: 31 32 37 36 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1276filecount109| (rust stream-legacy no-bigendian !) + 0030: 31 32 37 31 66 69 6c 65 63 6f 75 6e 74 31 30 39 |1271filecount109| (rust stream-legacy bigendian !) + 0040: 34 72 65 71 75 69 72 65 6d 65 6e 74 73 67 65 6e |4requirementsgen| (rust stream-legacy !) 0050: 65 72 61 6c 64 65 6c 74 61 25 32 43 72 65 76 6c |eraldelta%2Crevl| 0060: 6f 67 2d 63 6f 6d 70 72 65 73 73 69 6f 6e 2d 7a |og-compression-z| 0070: 73 74 64 25 32 43 72 65 76 6c 6f 67 76 31 25 32 |std%2Crevlogv1%2| @@ -473,9 +490,9 @@ #if stream-legacy $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed streaming all changes - 1090 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !) searching for changes no changes found @@ -483,17 +500,20 @@ #if stream-bundle2-v2 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed streaming all changes - 1093 files to transfer, 102 KB of data (no-zstd !) + 1094 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1093 files to transfer, 98.9 KB of data (zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) + 1094 files to transfer, 98.9 KB of data (zstd no-rust !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1096 files to transfer, 99.0 KB of data (zstd rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) #endif #if stream-bundle2-v3 $ hg clone --uncompressed -U http://localhost:$HGPORT clone1-uncompressed streaming all changes 1093 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) #endif Clone with background file closing enabled @@ -505,8 +525,8 @@ sending branchmap command streaming all changes sending stream_out command - 1090 files to transfer, 102 KB of data (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) starting 4 threads for background file closing updating the branch cache transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) @@ -537,16 +557,20 @@ bundle2-input-bundle: with-transaction bundle2-input-part: "stream2" (params: 3 mandatory) supported applying stream bundle - 1093 files to transfer, 102 KB of data (no-zstd !) - 1093 files to transfer, 98.9 KB of data (zstd !) + 1094 files to transfer, 102 KB of data (no-zstd !) + 1094 files to transfer, 98.9 KB of data (zstd no-rust !) + 1096 files to transfer, 99.0 KB of data (zstd rust !) starting 4 threads for background file closing starting 4 threads for background file closing updating the branch cache transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - bundle2-input-part: total payload size 118984 (no-zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) - bundle2-input-part: total payload size 116145 (zstd no-bigendian !) - bundle2-input-part: total payload size 116140 (zstd bigendian !) + bundle2-input-part: total payload size 119001 (no-zstd !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) + bundle2-input-part: total payload size 116162 (zstd no-bigendian no-rust !) + bundle2-input-part: total payload size 116330 (zstd no-bigendian rust !) + bundle2-input-part: total payload size 116157 (zstd bigendian no-rust !) + bundle2-input-part: total payload size 116325 (zstd bigendian rust !) bundle2-input-part: "listkeys" (params: 1 mandatory) supported bundle2-input-bundle: 2 parts total checking for updated bookmarks @@ -569,10 +593,13 @@ starting 4 threads for background file closing updating the branch cache transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - bundle2-input-part: total payload size 120079 (no-zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) - bundle2-input-part: total payload size 117240 (zstd no-bigendian !) - bundle2-input-part: total payload size 116138 (zstd bigendian !) + bundle2-input-part: total payload size 120096 (no-zstd !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) + bundle2-input-part: total payload size 117257 (zstd no-rust no-bigendian !) + bundle2-input-part: total payload size 117425 (zstd rust no-bigendian !) + bundle2-input-part: total payload size 117252 (zstd bigendian no-rust !) + bundle2-input-part: total payload size 117420 (zstd bigendian rust !) bundle2-input-part: "listkeys" (params: 1 mandatory) supported bundle2-input-bundle: 2 parts total checking for updated bookmarks @@ -604,9 +631,9 @@ #if stream-legacy $ hg clone --stream -U http://localhost:$HGPORT secret-allowed streaming all changes - 1090 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !) searching for changes no changes found @@ -614,17 +641,20 @@ #if stream-bundle2-v2 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed streaming all changes - 1093 files to transfer, 102 KB of data (no-zstd !) + 1094 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1093 files to transfer, 98.9 KB of data (zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) + 1094 files to transfer, 98.9 KB of data (zstd no-rust !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1096 files to transfer, 99.0 KB of data (zstd rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) #endif #if stream-bundle2-v3 $ hg clone --stream -U http://localhost:$HGPORT secret-allowed streaming all changes 1093 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 98.9 KB in * seconds (* */sec) (glob) (zstd !) + transferred 98.9 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.0 KB in * seconds (* */sec) (glob) (zstd rust !) #endif $ killdaemons.py @@ -729,9 +759,9 @@ #if stream-legacy $ hg clone --stream http://localhost:$HGPORT with-bookmarks streaming all changes - 1090 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !) searching for changes no changes found @@ -741,10 +771,12 @@ #if stream-bundle2-v2 $ hg clone --stream http://localhost:$HGPORT with-bookmarks streaming all changes - 1096 files to transfer, 102 KB of data (no-zstd !) + 1097 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1096 files to transfer, 99.1 KB of data (zstd !) - transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !) + 1097 files to transfer, 99.1 KB of data (zstd no-rust !) + transferred 99.1 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1099 files to transfer, 99.2 KB of data (zstd rust !) + transferred 99.2 KB in * seconds (* */sec) (glob) (zstd rust !) updating to branch default 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved #endif @@ -753,7 +785,8 @@ streaming all changes 1096 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !) + transferred 99.1 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.2 KB in * seconds (* */sec) (glob) (zstd rust !) updating to branch default 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved #endif @@ -774,9 +807,9 @@ #if stream-legacy $ hg clone --stream http://localhost:$HGPORT phase-publish streaming all changes - 1090 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !) searching for changes no changes found @@ -786,10 +819,12 @@ #if stream-bundle2-v2 $ hg clone --stream http://localhost:$HGPORT phase-publish streaming all changes - 1096 files to transfer, 102 KB of data (no-zstd !) + 1097 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1096 files to transfer, 99.1 KB of data (zstd !) - transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !) + 1097 files to transfer, 99.1 KB of data (zstd no-rust !) + transferred 99.1 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1099 files to transfer, 99.2 KB of data (zstd rust !) + transferred 99.2 KB in * seconds (* */sec) (glob) (zstd rust !) updating to branch default 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved #endif @@ -798,7 +833,8 @@ streaming all changes 1096 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !) + transferred 99.1 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.2 KB in * seconds (* */sec) (glob) (zstd rust !) updating to branch default 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved #endif @@ -825,9 +861,9 @@ $ hg clone --stream http://localhost:$HGPORT phase-no-publish streaming all changes - 1090 files to transfer, 102 KB of data (no-zstd !) + 1091 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1090 files to transfer, 98.8 KB of data (zstd !) + 1091 files to transfer, 98.8 KB of data (zstd !) transferred 98.8 KB in * seconds (* */sec) (glob) (zstd !) searching for changes no changes found @@ -841,10 +877,12 @@ #if stream-bundle2-v2 $ hg clone --stream http://localhost:$HGPORT phase-no-publish streaming all changes - 1097 files to transfer, 102 KB of data (no-zstd !) + 1098 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1097 files to transfer, 99.1 KB of data (zstd !) - transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !) + 1098 files to transfer, 99.1 KB of data (zstd no-rust !) + transferred 99.1 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1100 files to transfer, 99.2 KB of data (zstd rust !) + transferred 99.2 KB in * seconds (* */sec) (glob) (zstd rust !) updating to branch default 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved $ hg -R phase-no-publish phase -r 'all()' @@ -857,7 +895,8 @@ streaming all changes 1097 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 99.1 KB in * seconds (* */sec) (glob) (zstd !) + transferred 99.1 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.2 KB in * seconds (* */sec) (glob) (zstd rust !) updating to branch default 1088 files updated, 0 files merged, 0 files removed, 0 files unresolved $ hg -R phase-no-publish phase -r 'all()' @@ -904,10 +943,12 @@ $ hg clone -U --stream http://localhost:$HGPORT with-obsolescence streaming all changes - 1098 files to transfer, 102 KB of data (no-zstd !) + 1099 files to transfer, 102 KB of data (no-zstd !) transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - 1098 files to transfer, 99.5 KB of data (zstd !) - transferred 99.5 KB in * seconds (* */sec) (glob) (zstd !) + 1099 files to transfer, 99.5 KB of data (zstd no-rust !) + transferred 99.5 KB in * seconds (* */sec) (glob) (zstd no-rust !) + 1101 files to transfer, 99.6 KB of data (zstd rust !) + transferred 99.6 KB in * seconds (* */sec) (glob) (zstd rust !) $ hg -R with-obsolescence log -T '{rev}: {phase}\n' 2: draft 1: draft @@ -956,7 +997,8 @@ streaming all changes 1098 entries to transfer transferred 102 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 99.5 KB in * seconds (* */sec) (glob) (zstd !) + transferred 99.5 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 99.6 KB in * seconds (* */sec) (glob) (zstd rust !) $ hg -R with-obsolescence log -T '{rev}: {phase}\n' 2: draft 1: draft
--- a/tests/test-clone.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-clone.t Mon Feb 12 16:22:47 2024 +0100 @@ -71,22 +71,42 @@ #if hardlink $ hg --debug clone -U . ../c --config progress.debug=true - linking: 1/15 files (6.67%) - linking: 2/15 files (13.33%) - linking: 3/15 files (20.00%) - linking: 4/15 files (26.67%) - linking: 5/15 files (33.33%) - linking: 6/15 files (40.00%) - linking: 7/15 files (46.67%) - linking: 8/15 files (53.33%) - linking: 9/15 files (60.00%) - linking: 10/15 files (66.67%) - linking: 11/15 files (73.33%) - linking: 12/15 files (80.00%) - linking: 13/15 files (86.67%) - linking: 14/15 files (93.33%) - linking: 15/15 files (100.00%) - linked 15 files + linking: 1/16 files (6.25%) (no-rust !) + linking: 2/16 files (12.50%) (no-rust !) + linking: 3/16 files (18.75%) (no-rust !) + linking: 4/16 files (25.00%) (no-rust !) + linking: 5/16 files (31.25%) (no-rust !) + linking: 6/16 files (37.50%) (no-rust !) + linking: 7/16 files (43.75%) (no-rust !) + linking: 8/16 files (50.00%) (no-rust !) + linking: 9/16 files (56.25%) (no-rust !) + linking: 10/16 files (62.50%) (no-rust !) + linking: 11/16 files (68.75%) (no-rust !) + linking: 12/16 files (75.00%) (no-rust !) + linking: 13/16 files (81.25%) (no-rust !) + linking: 14/16 files (87.50%) (no-rust !) + linking: 15/16 files (93.75%) (no-rust !) + linking: 16/16 files (100.00%) (no-rust !) + linked 16 files (no-rust !) + linking: 1/18 files (5.56%) (rust !) + linking: 2/18 files (11.11%) (rust !) + linking: 3/18 files (16.67%) (rust !) + linking: 4/18 files (22.22%) (rust !) + linking: 5/18 files (27.78%) (rust !) + linking: 6/18 files (33.33%) (rust !) + linking: 7/18 files (38.89%) (rust !) + linking: 8/18 files (44.44%) (rust !) + linking: 9/18 files (50.00%) (rust !) + linking: 10/18 files (55.56%) (rust !) + linking: 11/18 files (61.11%) (rust !) + linking: 12/18 files (66.67%) (rust !) + linking: 13/18 files (72.22%) (rust !) + linking: 14/18 files (77.78%) (rust !) + linking: 15/18 files (83.33%) (rust !) + linking: 16/18 files (88.89%) (rust !) + linking: 17/18 files (94.44%) (rust !) + linking: 18/18 files (100.00%) (rust !) + linked 18 files (rust !) updating the branch cache #else $ hg --debug clone -U . ../c --config progress.debug=true
--- a/tests/test-clonebundles.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-clonebundles.t Mon Feb 12 16:22:47 2024 +0100 @@ -379,7 +379,8 @@ Stream clone bundles are supported $ hg -R server debugcreatestreamclonebundle packed.hg - writing 613 bytes for 4 files + writing 613 bytes for 5 files (no-rust !) + writing 739 bytes for 7 files (rust !) bundle requirements: generaldelta, revlogv1, sparserevlog (no-rust no-zstd !) bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog (no-rust zstd !) bundle requirements: generaldelta, revlog-compression-zstd, revlogv1, sparserevlog (rust !) @@ -392,8 +393,10 @@ $ hg clone -U http://localhost:$HGPORT stream-clone-no-spec applying clone bundle from http://localhost:$HGPORT1/packed.hg - 4 files to transfer, 613 bytes of data - transferred 613 bytes in *.* seconds (*) (glob) + 5 files to transfer, 613 bytes of data (no-rust !) + transferred 613 bytes in *.* seconds (*) (glob) (no-rust !) + 7 files to transfer, 739 bytes of data (rust !) + transferred 739 bytes in *.* seconds (*) (glob) (rust !) finished applying clone bundle searching for changes no changes found @@ -406,8 +409,10 @@ $ hg clone -U http://localhost:$HGPORT stream-clone-vanilla-spec applying clone bundle from http://localhost:$HGPORT1/packed.hg - 4 files to transfer, 613 bytes of data - transferred 613 bytes in *.* seconds (*) (glob) + 5 files to transfer, 613 bytes of data (no-rust !) + transferred 613 bytes in *.* seconds (*) (glob) (no-rust !) + 7 files to transfer, 739 bytes of data (rust !) + transferred 739 bytes in *.* seconds (*) (glob) (rust !) finished applying clone bundle searching for changes no changes found @@ -420,8 +425,10 @@ $ hg clone -U http://localhost:$HGPORT stream-clone-supported-requirements applying clone bundle from http://localhost:$HGPORT1/packed.hg - 4 files to transfer, 613 bytes of data - transferred 613 bytes in *.* seconds (*) (glob) + 5 files to transfer, 613 bytes of data (no-rust !) + transferred 613 bytes in *.* seconds (*) (glob) (no-rust !) + 7 files to transfer, 739 bytes of data (rust !) + transferred 739 bytes in *.* seconds (*) (glob) (rust !) finished applying clone bundle searching for changes no changes found @@ -567,8 +574,10 @@ no compatible clone bundles available on server; falling back to regular clone (you may want to report this to the server operator) streaming all changes - 9 files to transfer, 816 bytes of data - transferred 816 bytes in * seconds (*) (glob) + 10 files to transfer, 816 bytes of data (no-rust !) + transferred 816 bytes in * seconds (*) (glob) (no-rust !) + 12 files to transfer, 942 bytes of data (rust !) + transferred 942 bytes in *.* seconds (*) (glob) (rust !) A manifest with a stream clone but no BUNDLESPEC @@ -580,8 +589,10 @@ no compatible clone bundles available on server; falling back to regular clone (you may want to report this to the server operator) streaming all changes - 9 files to transfer, 816 bytes of data - transferred 816 bytes in * seconds (*) (glob) + 10 files to transfer, 816 bytes of data (no-rust !) + transferred 816 bytes in * seconds (*) (glob) (no-rust !) + 12 files to transfer, 942 bytes of data (rust !) + transferred 942 bytes in *.* seconds (*) (glob) (rust !) A manifest with a gzip bundle and a stream clone @@ -592,8 +603,10 @@ $ hg clone -U --stream http://localhost:$HGPORT uncompressed-gzip-packed applying clone bundle from http://localhost:$HGPORT1/packed.hg - 4 files to transfer, 613 bytes of data - transferred 613 bytes in * seconds (*) (glob) + 5 files to transfer, 613 bytes of data (no-rust !) + transferred 613 bytes in *.* seconds (*) (glob) (no-rust !) + 7 files to transfer, 739 bytes of data (rust !) + transferred 739 bytes in *.* seconds (*) (glob) (rust !) finished applying clone bundle searching for changes no changes found @@ -607,8 +620,10 @@ $ hg clone -U --stream http://localhost:$HGPORT uncompressed-gzip-packed-requirements applying clone bundle from http://localhost:$HGPORT1/packed.hg - 4 files to transfer, 613 bytes of data - transferred 613 bytes in * seconds (*) (glob) + 5 files to transfer, 613 bytes of data (no-rust !) + transferred 613 bytes in *.* seconds (*) (glob) (no-rust !) + 7 files to transfer, 739 bytes of data (rust !) + transferred 739 bytes in *.* seconds (*) (glob) (rust !) finished applying clone bundle searching for changes no changes found @@ -624,8 +639,10 @@ no compatible clone bundles available on server; falling back to regular clone (you may want to report this to the server operator) streaming all changes - 9 files to transfer, 816 bytes of data - transferred 816 bytes in * seconds (*) (glob) + 10 files to transfer, 816 bytes of data (no-rust !) + transferred 816 bytes in * seconds (*) (glob) (no-rust !) + 12 files to transfer, 942 bytes of data (rust !) + transferred 942 bytes in *.* seconds (*) (glob) (rust !) Test clone bundle retrieved through bundle2
--- a/tests/test-debug-rebuild-dirstate.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-debug-rebuild-dirstate.t Mon Feb 12 16:22:47 2024 +0100 @@ -28,12 +28,12 @@ $ f --size .hg/dirstate* .hg/dirstate: size=133 - .hg/dirstate.b870a51b: size=511 - $ hg debugrebuilddirstate - $ f --size .hg/dirstate* - .hg/dirstate: size=133 .hg/dirstate.88698448: size=511 $ hg debugrebuilddirstate $ f --size .hg/dirstate* .hg/dirstate: size=133 .hg/dirstate.6b8ab34b: size=511 + $ hg debugrebuilddirstate + $ f --size .hg/dirstate* + .hg/dirstate: size=133 + .hg/dirstate.b875dfc5: size=511
--- a/tests/test-debugcommands.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-debugcommands.t Mon Feb 12 16:22:47 2024 +0100 @@ -17,7 +17,7 @@ #if reporevlogstore $ hg debugrevlog -c format : 1 - flags : inline + flags : (none) revisions : 3 merges : 0 ( 0.00%) @@ -185,12 +185,12 @@ debugdelta chain basic output -#if reporevlogstore pure +#if reporevlogstore pure rust $ hg debugindexstats - abort: debugindexstats only works with native code + abort: debugindexstats only works with native C code [255] #endif -#if reporevlogstore no-pure +#if reporevlogstore no-pure no-rust $ hg debugindexstats node trie capacity: 4 node trie count: 2
--- a/tests/test-eol-clone.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-eol-clone.t Mon Feb 12 16:22:47 2024 +0100 @@ -80,7 +80,8 @@ [patterns] **.txt = native $ hg clone repo repo-3 -v --debug - linked 7 files + linked 8 files (no-rust !) + linked 10 files (rust !) updating to branch default resolving manifests branchmerge: False, force: False, partial: False
--- a/tests/test-flagprocessor.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-flagprocessor.t Mon Feb 12 16:22:47 2024 +0100 @@ -1,3 +1,7 @@ +# Rust index does not support creating new flags dynamically + +#if no-rust + # Create server $ hg init server $ cd server @@ -304,3 +308,6 @@ $ hg status $ hg diff + + +#endif
--- a/tests/test-fncache.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-fncache.t Mon Feb 12 16:22:47 2024 +0100 @@ -88,7 +88,10 @@ adding tst.d/foo $ find .hg | sort .hg + .hg/00changelog-6b8ab34b.nd (rust !) + .hg/00changelog.d .hg/00changelog.i + .hg/00changelog.n (rust !) .hg/00manifest.i .hg/branch .hg/cache @@ -135,7 +138,10 @@ .hg/last-message.txt .hg/requires .hg/store + .hg/store/00changelog-b875dfc5.nd (rust !) + .hg/store/00changelog.d .hg/store/00changelog.i + .hg/store/00changelog.n (rust !) .hg/store/00manifest.i .hg/store/data .hg/store/data/tst.d.hg
--- a/tests/test-generaldelta.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-generaldelta.t Mon Feb 12 16:22:47 2024 +0100 @@ -271,7 +271,7 @@ 51 17 -1 4 3 50 prev 3?? 5?? 6?? 1.0???? 6?? 0 0.00000 (glob) 52 51 -1 4 4 51 p1 58 640 6?? 1.0???? 6?? 0 0.00000 (glob) 53 52 -1 5 1 -1 base 0 0 0 0.00000 0 0 0.00000 - 54 53 -1 6 1 -1 base 3?? 640 3?? 0.5???? 3?? 0 0.00000 (glob) + 54 53 -1 5 2 53 p1 3?? 640 3?? 0.5???? 3?? 0 0.00000 (glob) $ hg clone --pull source-repo --config experimental.maxdeltachainspan=2800 relax-chain --config format.generaldelta=yes requesting all changes adding changesets
--- a/tests/test-hardlinks.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-hardlinks.t Mon Feb 12 16:22:47 2024 +0100 @@ -44,7 +44,10 @@ $ cd ../.. $ nlinksdir r1/.hg/store + 1 r1/.hg/store/00changelog-b870a51b.nd (rust !) + 1 r1/.hg/store/00changelog.d 1 r1/.hg/store/00changelog.i + 1 r1/.hg/store/00changelog.n (rust !) 1 r1/.hg/store/00manifest.i 1 r1/.hg/store/data/d1/f2.i 1 r1/.hg/store/data/f1.i @@ -52,6 +55,7 @@ 1 r1/.hg/store/phaseroots 1 r1/.hg/store/requires 1 r1/.hg/store/undo + 1 r1/.hg/store/undo.backup.00changelog.n.bck (rust !) 1 r1/.hg/store/undo.backup.fncache.bck (repofncache !) 1 r1/.hg/store/undo.backupfiles @@ -59,14 +63,26 @@ Create hardlinked clone r2: $ hg clone -U --debug r1 r2 --config progress.debug=true - linking: 1/7 files (14.29%) - linking: 2/7 files (28.57%) - linking: 3/7 files (42.86%) - linking: 4/7 files (57.14%) - linking: 5/7 files (71.43%) - linking: 6/7 files (85.71%) - linking: 7/7 files (100.00%) - linked 7 files + linking: 1/8 files (12.50%) (no-rust !) + linking: 2/8 files (25.00%) (no-rust !) + linking: 3/8 files (37.50%) (no-rust !) + linking: 4/8 files (50.00%) (no-rust !) + linking: 5/8 files (62.50%) (no-rust !) + linking: 6/8 files (75.00%) (no-rust !) + linking: 7/8 files (87.50%) (no-rust !) + linking: 8/8 files (100.00%) (no-rust !) + linked 8 files (no-rust !) + linking: 1/10 files (10.00%) (rust !) + linking: 2/10 files (20.00%) (rust !) + linking: 3/10 files (30.00%) (rust !) + linking: 4/10 files (40.00%) (rust !) + linking: 5/10 files (50.00%) (rust !) + linking: 6/10 files (60.00%) (rust !) + linking: 7/10 files (70.00%) (rust !) + linking: 8/10 files (80.00%) (rust !) + linking: 9/10 files (90.00%) (rust !) + linking: 10/10 files (100.00%) (rust !) + linked 10 files (rust !) updating the branch cache Create non-hardlinked clone r3: @@ -85,7 +101,10 @@ Repos r1 and r2 should now contain hardlinked files: $ nlinksdir r1/.hg/store + 1 r1/.hg/store/00changelog-b870a51b.nd (rust !) + 2 r1/.hg/store/00changelog.d 2 r1/.hg/store/00changelog.i + 1 r1/.hg/store/00changelog.n (rust !) 2 r1/.hg/store/00manifest.i 2 r1/.hg/store/data/d1/f2.i 2 r1/.hg/store/data/f1.i @@ -93,11 +112,15 @@ 1 r1/.hg/store/phaseroots 1 r1/.hg/store/requires 1 r1/.hg/store/undo + 1 r1/.hg/store/undo.backup.00changelog.n.bck (rust !) 1 r1/.hg/store/undo.backup.fncache.bck (repofncache !) 1 r1/.hg/store/undo.backupfiles $ nlinksdir r2/.hg/store + 1 r2/.hg/store/00changelog-b870a51b.nd (rust !) + 2 r2/.hg/store/00changelog.d 2 r2/.hg/store/00changelog.i + 1 r2/.hg/store/00changelog.n (rust !) 2 r2/.hg/store/00manifest.i 2 r2/.hg/store/data/d1/f2.i 2 r2/.hg/store/data/f1.i @@ -107,7 +130,10 @@ Repo r3 should not be hardlinked: $ nlinksdir r3/.hg/store + 1 r3/.hg/store/00changelog-88698448.nd (rust !) + 1 r3/.hg/store/00changelog.d 1 r3/.hg/store/00changelog.i + 1 r3/.hg/store/00changelog.n (rust !) 1 r3/.hg/store/00manifest.i 1 r3/.hg/store/data/d1/f2.i 1 r3/.hg/store/data/f1.i @@ -132,7 +158,10 @@ $ cd ../.. $ nlinksdir r3/.hg/store + 1 r3/.hg/store/00changelog-ea337809.nd (rust !) + 1 r3/.hg/store/00changelog.d 1 r3/.hg/store/00changelog.i + 1 r3/.hg/store/00changelog.n (rust !) 1 r3/.hg/store/00manifest.i 1 r3/.hg/store/data/d1/f2.d 1 r3/.hg/store/data/d1/f2.i @@ -141,6 +170,7 @@ 1 r3/.hg/store/phaseroots 1 r3/.hg/store/requires 1 r3/.hg/store/undo + 1 r3/.hg/store/undo.backup.00changelog.n.bck (rust !) 1 r3/.hg/store/undo.backupfiles Push to repo r1 should break up most hardlinks in r2: @@ -159,7 +189,10 @@ $ cd .. $ nlinksdir r2/.hg/store + 1 r2/.hg/store/00changelog-b870a51b.nd (rust !) + 1 r2/.hg/store/00changelog.d 1 r2/.hg/store/00changelog.i + 1 r2/.hg/store/00changelog.n (rust !) 1 r2/.hg/store/00manifest.i 1 r2/.hg/store/data/d1/f2.i 2 r2/.hg/store/data/f1.i @@ -184,7 +217,10 @@ $ cd .. $ nlinksdir r2/.hg/store + 1 r2/.hg/store/00changelog-b870a51b.nd (rust !) + 1 r2/.hg/store/00changelog.d 1 r2/.hg/store/00changelog.i + 1 r2/.hg/store/00changelog.n (rust !) 1 r2/.hg/store/00manifest.i 1 r2/.hg/store/data/d1/f2.i 1 r2/.hg/store/data/f1.i @@ -241,7 +277,10 @@ 2 r4/.hg/hgrc 2 r4/.hg/last-message.txt 2 r4/.hg/requires + 2 r4/.hg/store/00changelog-7f2eb713.nd (rust !) + 2 r4/.hg/store/00changelog.d 2 r4/.hg/store/00changelog.i + 2 r4/.hg/store/00changelog.n (rust !) 2 r4/.hg/store/00manifest.i 2 r4/.hg/store/data/d1/f2.d 2 r4/.hg/store/data/d1/f2.i @@ -251,6 +290,7 @@ 2 r4/.hg/store/phaseroots 2 r4/.hg/store/requires 2 r4/.hg/store/undo + 2 r4/.hg/store/undo.backup.00changelog.n.bck (rust !) 2 r4/.hg/store/undo.backupfiles [24] r4/.hg/undo.backup.branch.bck (re) 2 r4/\.hg/undo\.backup\.dirstate.bck (re) @@ -294,7 +334,10 @@ 2 r4/.hg/hgrc 2 r4/.hg/last-message.txt 2 r4/.hg/requires + 2 r4/.hg/store/00changelog-7f2eb713.nd (rust !) + 2 r4/.hg/store/00changelog.d 2 r4/.hg/store/00changelog.i + 2 r4/.hg/store/00changelog.n (rust !) 2 r4/.hg/store/00manifest.i 2 r4/.hg/store/data/d1/f2.d 2 r4/.hg/store/data/d1/f2.i @@ -304,6 +347,7 @@ 2 r4/.hg/store/phaseroots 2 r4/.hg/store/requires 2 r4/.hg/store/undo + 2 r4/.hg/store/undo.backup.00changelog.n.bck (rust !) 2 r4/.hg/store/undo.backupfiles [23] r4/.hg/undo.backup.branch.bck (re) 2 r4/\.hg/undo\.backup\.dirstate.bck (re)
--- a/tests/test-hook.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-hook.t Mon Feb 12 16:22:47 2024 +0100 @@ -458,14 +458,18 @@ (Check that no 'changelog.i.a' file were left behind) $ ls -1 .hg/store/ + 00changelog-1335303a.nd (rust !) + 00changelog.d 00changelog.i + 00changelog.n (rust !) 00manifest.i data - fncache (repofncache !) + fncache phaseroots requires undo - undo.backup.fncache.bck (repofncache !) + undo.backup.00changelog.n.bck (rust !) + undo.backup.fncache.bck undo.backupfiles
--- a/tests/test-http-bad-server.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-http-bad-server.t Mon Feb 12 16:22:47 2024 +0100 @@ -725,6 +725,8 @@ $ hg clone http://localhost:$HGPORT/ clone requesting all changes adding changesets + transaction abort! + rollback completed abort: HTTP request error (incomplete response) (this may be an intermittent network failure; if the error persists, consider contacting the network or server operator) [255] @@ -757,6 +759,8 @@ $ hg clone http://localhost:$HGPORT/ clone requesting all changes adding changesets + transaction abort! + rollback completed abort: HTTP request error (incomplete response*) (glob) (this may be an intermittent network failure; if the error persists, consider contacting the network or server operator) [255] @@ -791,6 +795,8 @@ $ hg clone http://localhost:$HGPORT/ clone requesting all changes adding changesets + transaction abort! + rollback completed abort: HTTP request error (incomplete response) (this may be an intermittent network failure; if the error persists, consider contacting the network or server operator) [255]
--- a/tests/test-http-bundle1.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-http-bundle1.t Mon Feb 12 16:22:47 2024 +0100 @@ -38,8 +38,9 @@ #if no-reposimplestore $ hg clone --stream http://localhost:$HGPORT/ copy 2>&1 streaming all changes - 6 files to transfer, 606 bytes of data (no-zstd !) - 6 files to transfer, 608 bytes of data (zstd !) + 7 files to transfer, 606 bytes of data (no-zstd !) + 7 files to transfer, 608 bytes of data (zstd no-rust !) + 9 files to transfer, 734 bytes of data (zstd rust !) transferred * bytes in * seconds (*/sec) (glob) searching for changes no changes found @@ -218,9 +219,10 @@ #if no-reposimplestore $ hg clone http://user:pass@localhost:$HGPORT2/ dest 2>&1 streaming all changes - 7 files to transfer, 916 bytes of data (no-zstd !) - 7 files to transfer, 919 bytes of data (zstd !) - transferred * bytes in * seconds (*/sec) (glob) + 8 files to transfer, 916 bytes of data (no-zstd !) + 8 files to transfer, 919 bytes of data (zstd no-rust !) + 10 files to transfer, 1.02 KB of data (zstd rust !) + transferred * in * seconds (*/sec) (glob) searching for changes no changes found updating to branch default @@ -378,7 +380,8 @@ streaming all changes * files to transfer, * of data (glob) transferred 1.36 KB in * seconds (* */sec) (glob) (no-zstd !) - transferred 1.38 KB in * seconds (* */sec) (glob) (zstd !) + transferred 1.38 KB in * seconds (* */sec) (glob) (zstd no-rust !) + transferred 1.56 KB in * seconds (* */sec) (glob) (zstd rust !) searching for changes no changes found #endif
--- a/tests/test-http-proxy.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-http-proxy.t Mon Feb 12 16:22:47 2024 +0100 @@ -16,7 +16,8 @@ $ http_proxy=http://localhost:$HGPORT1/ hg --config http_proxy.always=True clone --stream http://localhost:$HGPORT/ b streaming all changes - 6 files to transfer, 412 bytes of data (reporevlogstore !) + 7 files to transfer, 412 bytes of data (reporevlogstore no-rust !) + 9 files to transfer, 538 bytes of data (reporevlogstore rust !) 4 files to transfer, 330 bytes of data (reposimplestore !) transferred * bytes in * seconds (*/sec) (glob) updating to branch default
--- a/tests/test-http.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-http.t Mon Feb 12 16:22:47 2024 +0100 @@ -29,8 +29,9 @@ #if no-reposimplestore $ hg clone --stream http://localhost:$HGPORT/ copy 2>&1 streaming all changes - 9 files to transfer, 715 bytes of data (no-zstd !) - 9 files to transfer, 717 bytes of data (zstd !) + 10 files to transfer, 715 bytes of data (no-zstd !) + 10 files to transfer, 717 bytes of data (zstd no-rust !) + 12 files to transfer, 843 bytes of data (zstd rust !) transferred * bytes in * seconds (*/sec) (glob) updating to branch default 4 files updated, 0 files merged, 0 files removed, 0 files unresolved @@ -254,7 +255,8 @@ #if no-reposimplestore $ hg clone http://user:pass@localhost:$HGPORT2/ dest 2>&1 streaming all changes - 10 files to transfer, 1.01 KB of data + 11 files to transfer, 1.01 KB of data (no-rust !) + 13 files to transfer, 1.13 KB of data (rust !) transferred * KB in * seconds (*/sec) (glob) updating to branch default 5 files updated, 0 files merged, 0 files removed, 0 files unresolved
--- a/tests/test-inherit-mode.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-inherit-mode.t Mon Feb 12 16:22:47 2024 +0100 @@ -78,7 +78,10 @@ 00660 ./.hg/last-message.txt 00600 ./.hg/requires 00770 ./.hg/store/ + 00660 ./.hg/store/00changelog-150e1cfc.nd (rust !) + 00660 ./.hg/store/00changelog.d 00660 ./.hg/store/00changelog.i + 00660 ./.hg/store/00changelog.n (rust !) 00660 ./.hg/store/00manifest.i 00770 ./.hg/store/data/ 00770 ./.hg/store/data/dir/ @@ -137,7 +140,10 @@ 00660 ../push/.hg/cache/rbc-revs-v1 00660 ../push/.hg/requires 00770 ../push/.hg/store/ + 00660 ../push/.hg/store/00changelog-b870a51b.nd (rust !) + 00660 ../push/.hg/store/00changelog.d 00660 ../push/.hg/store/00changelog.i + 00660 ../push/.hg/store/00changelog.n (rust !) 00660 ../push/.hg/store/00manifest.i 00770 ../push/.hg/store/data/ 00770 ../push/.hg/store/data/dir/
--- a/tests/test-issue6528.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-issue6528.t Mon Feb 12 16:22:47 2024 +0100 @@ -367,7 +367,7 @@ $ cd repo-to-fix-not-inline $ tar -xf - < "$TESTDIR"/bundles/issue6528.tar $ echo b >> b.txt - $ hg commit -qm "inline -> separate" + $ hg commit -qm "inline -> separate" --traceback $ find .hg -name *b.txt.d .hg/store/data/b.txt.d
--- a/tests/test-journal-exists.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-journal-exists.t Mon Feb 12 16:22:47 2024 +0100 @@ -47,6 +47,8 @@ $ hg -R foo unbundle repo.hg adding changesets + transaction abort! + rollback completed abort: $EACCES$: '$TESTTMP/repo/foo/.hg/store/.00changelog.i-*' (glob) [255]
--- a/tests/test-lfs-test-server.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-lfs-test-server.t Mon Feb 12 16:22:47 2024 +0100 @@ -908,7 +908,8 @@ $ cd $TESTTMP $ hg --debug clone test test2 http auth: user foo, password *** - linked 6 files + linked 7 files (no-rust !) + linked 9 files (rust !) http auth: user foo, password *** updating to branch default resolving manifests
--- a/tests/test-log.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-log.t Mon Feb 12 16:22:47 2024 +0100 @@ -2001,6 +2001,8 @@ @@ -0,0 +1,1 @@ +b + $ hg log -r 3 -T'{diffstat}\n' + 2: +2/-1 Test that diff.merge is respected (file b was added on one side and and therefore merged cleanly) @@ -2021,6 +2023,9 @@ -b +c + $ hg log -r 3 -T'{diffstat}\n' --config diff.merge=yes + 1: +1/-1 + $ cd .. 'hg log -r rev fn' when last(filelog(fn)) != rev
--- a/tests/test-narrow-clone-stream.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-narrow-clone-stream.t Mon Feb 12 16:22:47 2024 +0100 @@ -80,7 +80,10 @@ Making sure store has the required files $ ls .hg/store/ + 00changelog-????????.nd (glob) (rust !) + 00changelog.d 00changelog.i + 00changelog.n (rust !) 00manifest.i data fncache (tree !)
--- a/tests/test-narrow.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-narrow.t Mon Feb 12 16:22:47 2024 +0100 @@ -544,3 +544,17 @@ deleting meta/d0/00manifest.i (tree !) deleting unwanted files from working copy not deleting possibly dirty file d0/f + + +Test removing `rootfilesin:` include + $ hg clone --narrow ssh://user@dummy/master narrow-concurrent-modify -q \ + > --include rootfilesin:d0 --include rootfilesin:d1 + $ cd narrow-concurrent-modify + $ hg --config 'hooks.pretxnopen = echo modified >> d0/f' tracked --removeinclude rootfilesin:d0 + comparing with ssh://user@dummy/master + searching for changes + looking for local changes to affected paths + deleting data/d0/f.i + deleting meta/d0/00manifest.i (tree !) + deleting unwanted files from working copy + not deleting possibly dirty file d0/f
--- a/tests/test-parseindex.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-parseindex.t Mon Feb 12 16:22:47 2024 +0100 @@ -145,8 +145,13 @@ > ] > for n, p in poisons: > # corrupt p1 at rev0 and p2 at rev1 - > d = data[:24] + p + data[28:127 + 28] + p + data[127 + 32:] - > open(n + b"/.hg/store/00changelog.i", "wb").write(d) + > rev_0 = data[:64] + > rev_1 = data[64:] + > altered_rev_0 = rev_0[:24] + p + rev_0[24 + 4:] + > altered_rev_1 = rev_1[:28] + p + rev_1[28 + 4:] + > new_data = altered_rev_0 + altered_rev_1 + > with open(n + b"/.hg/store/00changelog.i", "wb") as f: + > f.write(new_data) > EOF $ hg -R limit debugrevlogindex -f1 -c
--- a/tests/test-racy-mutations.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-racy-mutations.t Mon Feb 12 16:22:47 2024 +0100 @@ -110,7 +110,7 @@ note: use 'hg commit --logfile .hg/last-message.txt --edit' to reuse it transaction abort! rollback completed - abort: 00changelog.i: file cursor at position 249, expected 121 + abort: 00changelog.i: file cursor at position 128, expected 64 And no corruption in the changelog. $ hg debugrevlogindex -c rev linkrev nodeid p1 p2
--- a/tests/test-relink.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-relink.t Mon Feb 12 16:22:47 2024 +0100 @@ -80,17 +80,19 @@ $ hg relink --debug --config progress.debug=true | fix_path relinking $TESTTMP/repo/.hg/store to $TESTTMP/clone/.hg/store tip has 2 files, estimated total number of files: 3 - collecting: 00changelog.i 1/3 files (33.33%) - collecting: 00manifest.i 2/3 files (66.67%) - collecting: a.i 3/3 files (100.00%) - collecting: b.i 4/3 files (133.33%) - collecting: dummy.i 5/3 files (166.67%) - collected 5 candidate storage files + collecting: 00changelog.d 1/3 files (33.33%) + collecting: 00changelog.i 2/3 files (66.67%) + collecting: 00manifest.i 3/3 files (100.00%) + collecting: a.i 4/3 files (133.33%) + collecting: b.i 5/3 files (166.67%) + collecting: dummy.i 6/3 files (200.00%) + collected 6 candidate storage files + not linkable: 00changelog.d not linkable: 00changelog.i not linkable: 00manifest.i - pruning: data/a.i 3/5 files (60.00%) + pruning: data/a.i 4/6 files (66.67%) not linkable: data/b.i - pruning: data/dummy.i 5/5 files (100.00%) + pruning: data/dummy.i 6/6 files (100.00%) pruned down to 2 probably relinkable files relinking: data/a.i 1/2 files (50.00%) not linkable: data/dummy.i
--- a/tests/test-remotefilelog-bgprefetch.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-bgprefetch.t Mon Feb 12 16:22:47 2024 +0100 @@ -29,10 +29,12 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 2 files to transfer, 776 bytes of data (no-zstd !) + 3 files to transfer, 776 bytes of data (no-zstd !) transferred 776 bytes in * seconds (*/sec) (glob) (no-zstd !) - 2 files to transfer, 784 bytes of data (zstd !) - transferred 784 bytes in * seconds (* */sec) (glob) (zstd !) + 3 files to transfer, 784 bytes of data (zstd no-rust !) + transferred 784 bytes in * seconds (*/sec) (glob) (zstd no-rust !) + 5 files to transfer, 910 bytes of data (rust !) + transferred 910 bytes in * seconds (*/sec) (glob) (rust !) searching for changes no changes found
--- a/tests/test-remotefilelog-clone-tree.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-clone-tree.t Mon Feb 12 16:22:47 2024 +0100 @@ -20,8 +20,10 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 4 files to transfer, 449 bytes of data - transferred 449 bytes in * seconds (*/sec) (glob) + 5 files to transfer, 449 bytes of data (no-rust !) + transferred 449 bytes in * seconds (*/sec) (glob) (no-rust !) + 7 files to transfer, 575 bytes of data (rust !) + transferred 575 bytes in *.* seconds (*) (glob) (rust !) searching for changes no changes found $ cd shallow @@ -65,8 +67,10 @@ $ hgcloneshallow ssh://user@dummy/shallow shallow2 --noupdate streaming all changes - 5 files to transfer, 1008 bytes of data - transferred 1008 bytes in * seconds (*/sec) (glob) + 6 files to transfer, 1008 bytes of data (no-rust !) + transferred 1008 bytes in * seconds (*/sec) (glob) (no-rust !) + 8 files to transfer, 1.11 KB of data (rust !) + transferred 1.11 KB in * seconds (* */sec) (glob) (rust !) searching for changes no changes found $ cd shallow2
--- a/tests/test-remotefilelog-clone.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-clone.t Mon Feb 12 16:22:47 2024 +0100 @@ -17,8 +17,10 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 2 files to transfer, 227 bytes of data - transferred 227 bytes in * seconds (*/sec) (glob) + 3 files to transfer, 227 bytes of data (no-rust !) + transferred 227 bytes in * seconds (*/sec) (glob) (no-rust !) + 5 files to transfer, 353 bytes of data (rust !) + transferred 353 bytes in *.* seconds (*) (glob) (rust !) searching for changes no changes found $ cd shallow @@ -55,8 +57,10 @@ $ hgcloneshallow ssh://user@dummy/shallow shallow2 --noupdate streaming all changes - 3 files to transfer, 564 bytes of data - transferred 564 bytes in * seconds (*/sec) (glob) + 4 files to transfer, 564 bytes of data (no-rust !) + transferred 564 bytes in * seconds (*/sec) (glob) (no-rust !) + 6 files to transfer, 690 bytes of data (rust !) + transferred 690 bytes in * seconds (*/sec) (glob) (rust !) searching for changes no changes found $ cd shallow2
--- a/tests/test-remotefilelog-log.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-log.t Mon Feb 12 16:22:47 2024 +0100 @@ -20,8 +20,10 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 2 files to transfer, 473 bytes of data - transferred 473 bytes in * seconds (*/sec) (glob) + 3 files to transfer, 473 bytes of data (no-rust !) + transferred 473 bytes in * seconds (*/sec) (glob) (no-rust !) + 5 files to transfer, 599 bytes of data (rust !) + transferred 599 bytes in * seconds (*/sec) (glob) (rust !) searching for changes no changes found $ cd shallow
--- a/tests/test-remotefilelog-partial-shallow.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-partial-shallow.t Mon Feb 12 16:22:47 2024 +0100 @@ -18,10 +18,12 @@ $ hg clone --shallow ssh://user@dummy/master shallow --noupdate --config remotefilelog.includepattern=foo streaming all changes - 3 files to transfer, 336 bytes of data (no-zstd !) + 4 files to transfer, 336 bytes of data (no-zstd !) transferred 336 bytes in * seconds (* */sec) (glob) (no-zstd !) - 3 files to transfer, 338 bytes of data (zstd !) - transferred 338 bytes in * seconds (* */sec) (glob) (zstd !) + 4 files to transfer, 338 bytes of data (zstd no-rust !) + transferred 338 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 6 files to transfer, 464 bytes of data (zstd rust !) + transferred 464 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found $ cat >> shallow/.hg/hgrc <<EOF
--- a/tests/test-remotefilelog-prefetch.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-prefetch.t Mon Feb 12 16:22:47 2024 +0100 @@ -22,10 +22,12 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 2 files to transfer, 528 bytes of data (no-zstd !) + 3 files to transfer, 528 bytes of data (no-zstd !) transferred 528 bytes in * seconds (* */sec) (glob) (no-zstd !) - 2 files to transfer, 532 bytes of data (zstd !) - transferred 532 bytes in * seconds (* */sec) (glob) (zstd !) + 3 files to transfer, 532 bytes of data (zstd no-rust !) + transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 5 files to transfer, 658 bytes of data (zstd rust !) + transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found $ cd shallow @@ -166,10 +168,12 @@ $ hgcloneshallow ssh://user@dummy/master shallow2 streaming all changes - 2 files to transfer, 528 bytes of data (no-zstd !) + 3 files to transfer, 528 bytes of data (no-zstd !) transferred 528 bytes in * seconds * (glob) (no-zstd !) - 2 files to transfer, 532 bytes of data (zstd !) - transferred 532 bytes in * seconds (* */sec) (glob) (zstd !) + 3 files to transfer, 532 bytes of data (zstd no-rust !) + transferred 532 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 5 files to transfer, 658 bytes of data (zstd rust !) + transferred 658 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found updating to branch default
--- a/tests/test-remotefilelog-sparse.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-sparse.t Mon Feb 12 16:22:47 2024 +0100 @@ -22,10 +22,12 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 2 files to transfer, 527 bytes of data (no-zstd !) + 3 files to transfer, 527 bytes of data (no-zstd !) transferred 527 bytes in * seconds (* */sec) (glob) (no-zstd !) - 2 files to transfer, 534 bytes of data (zstd !) - transferred 534 bytes in * seconds (* */sec) (glob) (zstd !) + 3 files to transfer, 534 bytes of data (zstd no-rust !) + transferred 534 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 5 files to transfer, 660 bytes of data (zstd rust !) + transferred 660 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found $ cd shallow @@ -75,10 +77,12 @@ $ hgcloneshallow ssh://user@dummy/master shallow2 streaming all changes - 2 files to transfer, 527 bytes of data (no-zstd !) + 3 files to transfer, 527 bytes of data (no-zstd !) transferred 527 bytes in * seconds (*) (glob) (no-zstd !) - 2 files to transfer, 534 bytes of data (zstd !) - transferred 534 bytes in * seconds (* */sec) (glob) (zstd !) + 3 files to transfer, 534 bytes of data (zstd no-rust !) + transferred 534 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 5 files to transfer, 660 bytes of data (zstd rust !) + transferred 660 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found updating to branch default
--- a/tests/test-remotefilelog-tags.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-tags.t Mon Feb 12 16:22:47 2024 +0100 @@ -18,10 +18,12 @@ $ hg clone --shallow ssh://user@dummy/master shallow --noupdate --config remotefilelog.excludepattern=.hgtags streaming all changes - 3 files to transfer, 662 bytes of data (no-zstd !) + 4 files to transfer, 662 bytes of data (no-zstd !) transferred 662 bytes in * seconds (* */sec) (glob) (no-zstd !) - 3 files to transfer, 665 bytes of data (zstd !) - transferred 665 bytes in * seconds (* */sec) (glob) (zstd !) + 4 files to transfer, 665 bytes of data (zstd no-rust !) + transferred 665 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 6 files to transfer, 791 bytes of data (zstd rust !) + transferred 791 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found $ cat >> shallow/.hg/hgrc <<EOF
--- a/tests/test-remotefilelog-wireproto.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-remotefilelog-wireproto.t Mon Feb 12 16:22:47 2024 +0100 @@ -25,8 +25,10 @@ $ hgcloneshallow ssh://user@dummy/master shallow --noupdate streaming all changes - 2 files to transfer, 908 bytes of data - transferred 908 bytes in * seconds (*/sec) (glob) + 3 files to transfer, 908 bytes of data (no-rust !) + transferred 908 bytes in * seconds (*/sec) (glob) (no-rust !) + 5 files to transfer, 1.01 KB of data (rust !) + transferred 1.01 KB in * seconds (* */sec) (glob) (rust !) searching for changes no changes found $ cd shallow
--- a/tests/test-repair-strip.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-repair-strip.t Mon Feb 12 16:22:47 2024 +0100 @@ -71,6 +71,7 @@ 2 warnings encountered! 2 integrity errors encountered! % journal contents + 00changelog.d 00changelog.i 00manifest.i data/b.i @@ -133,6 +134,7 @@ 7 integrity errors encountered! (first damaged changeset appears to be 3) % journal contents + 00changelog.d 00changelog.i 00manifest.i data/b.i
--- a/tests/test-revlog-mmapindex.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-revlog-mmapindex.t Mon Feb 12 16:22:47 2024 +0100 @@ -10,10 +10,10 @@ > ) > > def extsetup(ui): - > def mmapread(orig, fp): + > def mmapread(orig, fp, *args): > ui.write(b"mmapping %s\n" % pycompat.bytestr(fp.name)) > ui.flush() - > return orig(fp) + > return orig(fp, *args) > > extensions.wrapfunction(util, 'mmapread', mmapread) > EOF @@ -38,6 +38,7 @@ mmap index which is now more than 4k long $ hg log -l 5 -T '{rev}\n' --config experimental.mmapindexthreshold=4k mmapping $TESTTMP/a/.hg/store/00changelog.i + mmapping $TESTTMP/a/.hg/store/00changelog-????????.nd (glob) (rust !) 100 99 98 @@ -46,6 +47,7 @@ do not mmap index which is still less than 32k $ hg log -l 5 -T '{rev}\n' --config experimental.mmapindexthreshold=32k + mmapping $TESTTMP/a/.hg/store/00changelog-????????.nd (glob) (rust !) 100 99 98
--- a/tests/test-rust-ancestor.py Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-rust-ancestor.py Mon Feb 12 16:22:47 2024 +0100 @@ -36,7 +36,7 @@ 'The Rust or C version of the "parsers" module, which the "ancestor" module' ' relies on, is not available.', ) -class rustancestorstest(revlogtesting.RevlogBasedTestBase): +class rustancestorstest(revlogtesting.RustRevlogBasedTestBase): """Test the correctness of binding to Rust code. This test is merely for the binding to Rust itself: extraction of @@ -50,7 +50,7 @@ """ def testiteratorrevlist(self): - idx = self.parseindex() + idx = self.parserustindex() # checking test assumption about the index binary data: self.assertEqual( {i: (r[5], r[6]) for i, r in enumerate(idx)}, @@ -63,7 +63,7 @@ self.assertEqual([r for r in ait], [2, 1, 0]) def testlazyancestors(self): - idx = self.parseindex() + idx = self.parserustindex() start_count = sys.getrefcount(idx) # should be 2 (see Python doc) self.assertEqual( {i: (r[5], r[6]) for i, r in enumerate(idx)}, @@ -93,7 +93,7 @@ self.assertFalse(LazyAncestors(idx, [0], 0, False)) def testmissingancestors(self): - idx = self.parseindex() + idx = self.parserustindex() missanc = MissingAncestors(idx, [1]) self.assertTrue(missanc.hasbases()) self.assertEqual(missanc.missingancestors([3]), [2, 3]) @@ -103,14 +103,14 @@ self.assertEqual(missanc.basesheads(), {2}) def testmissingancestorsremove(self): - idx = self.parseindex() + idx = self.parserustindex() missanc = MissingAncestors(idx, [1]) revs = {0, 1, 2, 3} missanc.removeancestorsfrom(revs) self.assertEqual(revs, {2, 3}) def testrefcount(self): - idx = self.parseindex() + idx = self.parserustindex() start_count = sys.getrefcount(idx) # refcount increases upon iterator init... @@ -127,13 +127,17 @@ del idx self.assertEqual(list(ait), [3, 2, 1, 0]) + # the index is not tracked by the GC, hence there is nothing more + # we can assert to check that it is properly deleted once its refcount + # drops to 0 + def testgrapherror(self): data = ( revlogtesting.data_non_inlined[: 64 + 27] + b'\xf2' + revlogtesting.data_non_inlined[64 + 28 :] ) - idx = cparsers.parse_index2(data, False)[0] + idx = self.parserustindex(data=data) with self.assertRaises(rustext.GraphError) as arc: AncestorsIterator(idx, [1], -1, False) exc = arc.exception @@ -143,7 +147,7 @@ def testwdirunsupported(self): # trying to access ancestors of the working directory raises - idx = self.parseindex() + idx = self.parserustindex() with self.assertRaises(rustext.GraphError) as arc: list(AncestorsIterator(idx, [wdirrev], -1, False)) @@ -153,7 +157,7 @@ self.assertEqual(exc.args, ('InvalidRevision', wdirrev)) def testheadrevs(self): - idx = self.parseindex() + idx = self.parserustindex() self.assertEqual(dagop.headrevs(idx, [1, 2, 3]), {3})
--- a/tests/test-rust-discovery.py Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-rust-discovery.py Mon Feb 12 16:22:47 2024 +0100 @@ -1,6 +1,7 @@ import unittest from mercurial import policy +from mercurial.testing import revlog as revlogtesting PartialDiscovery = policy.importrust('discovery', member='PartialDiscovery') @@ -47,7 +48,7 @@ "rustext or the C Extension parsers module " "discovery relies on is not available", ) -class rustdiscoverytest(unittest.TestCase): +class rustdiscoverytest(revlogtesting.RustRevlogBasedTestBase): """Test the correctness of binding to Rust code. This test is merely for the binding to Rust itself: extraction of @@ -60,7 +61,7 @@ """ def parseindex(self): - return cparsers.parse_index2(data_non_inlined, False)[0] + return self.parserustindex(data=data_non_inlined) def repo(self): return fakerepo(self.parseindex())
--- a/tests/test-rust-revlog.py Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-rust-revlog.py Mon Feb 12 16:22:47 2024 +0100 @@ -1,5 +1,8 @@ +import struct import unittest +from mercurial.node import hex + try: from mercurial import rustext @@ -14,6 +17,8 @@ from mercurial.testing import revlog as revlogtesting +header = struct.unpack(">I", revlogtesting.data_non_inlined[:4])[0] + @unittest.skipIf( rustext is None, @@ -22,24 +27,16 @@ class RustRevlogIndexTest(revlogtesting.RevlogBasedTestBase): def test_heads(self): idx = self.parseindex() - rustidx = revlog.MixedIndex(idx) + rustidx = revlog.Index(revlogtesting.data_non_inlined, header) self.assertEqual(rustidx.headrevs(), idx.headrevs()) - def test_get_cindex(self): - # drop me once we no longer need the method for shortest node - idx = self.parseindex() - rustidx = revlog.MixedIndex(idx) - cidx = rustidx.get_cindex() - self.assertTrue(idx is cidx) - def test_len(self): idx = self.parseindex() - rustidx = revlog.MixedIndex(idx) + rustidx = revlog.Index(revlogtesting.data_non_inlined, header) self.assertEqual(len(rustidx), len(idx)) def test_ancestors(self): - idx = self.parseindex() - rustidx = revlog.MixedIndex(idx) + rustidx = revlog.Index(revlogtesting.data_non_inlined, header) lazy = LazyAncestors(rustidx, [3], 0, True) # we have two more references to the index: # - in its inner iterator for __contains__ and __bool__ @@ -51,7 +48,40 @@ self.assertEqual(list(lazy), [3, 2, 1, 0]) # let's check bool for an empty one - self.assertFalse(LazyAncestors(idx, [0], 0, False)) + self.assertFalse(LazyAncestors(rustidx, [0], 0, False)) + + +@unittest.skipIf( + rustext is None, + "rustext module revlog relies on is not available", +) +class RustRevlogNodeTreeClassTest(revlogtesting.RustRevlogBasedTestBase): + def test_standalone_nodetree(self): + idx = self.parserustindex() + nt = revlog.NodeTree(idx) + for i in range(4): + nt.insert(i) + + bin_nodes = [entry[7] for entry in idx] + hex_nodes = [hex(n) for n in bin_nodes] + + for i, node in enumerate(hex_nodes): + self.assertEqual(nt.prefix_rev_lookup(node), i) + self.assertEqual(nt.prefix_rev_lookup(node[:5]), i) + + # all 4 revisions in idx (standard data set) have different + # first nybbles in their Node IDs, + # hence `nt.shortest()` should return 1 for them, except when + # the leading nybble is 0 (ambiguity with NULL_NODE) + for i, (bin_node, hex_node) in enumerate(zip(bin_nodes, hex_nodes)): + shortest = nt.shortest(bin_node) + expected = 2 if hex_node[0] == ord('0') else 1 + self.assertEqual(shortest, expected) + self.assertEqual(nt.prefix_rev_lookup(hex_node[:shortest]), i) + + # test invalidation (generation poisoning) detection + del idx[3] + self.assertTrue(nt.is_invalidated()) if __name__ == '__main__':
--- a/tests/test-sparse-revlog.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-sparse-revlog.t Mon Feb 12 16:22:47 2024 +0100 @@ -164,7 +164,21 @@ 4971 4970 -1 3 5 4930 snap $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 DBG-DELTAS-SEARCH: SEARCH rev=4971 - DBG-DELTAS-SEARCH: ROUND #1 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #1 - 3 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4329 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=13781 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=173765 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=106516 (BAD) + DBG-DELTAS-SEARCH: CANDIDATE: rev=4335 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=14385 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=171587 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=105187 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4962 DBG-DELTAS-SEARCH: type=snapshot-4 DBG-DELTAS-SEARCH: size=18296 @@ -172,7 +186,14 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=30377 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD) - DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #2 - 2 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4325 + DBG-DELTAS-SEARCH: type=snapshot-3 + DBG-DELTAS-SEARCH: size=17384 + DBG-DELTAS-SEARCH: base=4318 + DBG-DELTAS-SEARCH: uncompressed-delta-size=178121 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=109064 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4930 DBG-DELTAS-SEARCH: type=snapshot-3 DBG-DELTAS-SEARCH: size=39228 @@ -189,7 +210,7 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=82661 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD) - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=6 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ cat << EOF >>.hg/hgrc > [storage] @@ -198,10 +219,24 @@ > EOF $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 --quiet - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=6 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 --source full DBG-DELTAS-SEARCH: SEARCH rev=4971 - DBG-DELTAS-SEARCH: ROUND #1 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #1 - 3 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4329 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=13781 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=173765 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=106516 (BAD) + DBG-DELTAS-SEARCH: CANDIDATE: rev=4335 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=14385 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=171587 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=105187 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4962 DBG-DELTAS-SEARCH: type=snapshot-4 DBG-DELTAS-SEARCH: size=18296 @@ -209,7 +244,14 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=30377 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD) - DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #2 - 2 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4325 + DBG-DELTAS-SEARCH: type=snapshot-3 + DBG-DELTAS-SEARCH: size=17384 + DBG-DELTAS-SEARCH: base=4318 + DBG-DELTAS-SEARCH: uncompressed-delta-size=178121 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=109064 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4930 DBG-DELTAS-SEARCH: type=snapshot-3 DBG-DELTAS-SEARCH: size=39228 @@ -226,7 +268,7 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=82661 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD) - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=6 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 --source storage DBG-DELTAS-SEARCH: SEARCH rev=4971 DBG-DELTAS-SEARCH: ROUND #1 - 1 candidates - cached-delta @@ -237,10 +279,24 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=33050 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=19179 (GOOD) - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=1 - search-rounds=1 try-count=1 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=1 - search-rounds=1 try-count=1 - delta-type=delta snap-depth=-1 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 --source p1 DBG-DELTAS-SEARCH: SEARCH rev=4971 - DBG-DELTAS-SEARCH: ROUND #1 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #1 - 3 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4329 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=13781 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=173765 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=106516 (BAD) + DBG-DELTAS-SEARCH: CANDIDATE: rev=4335 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=14385 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=171587 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=105187 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4962 DBG-DELTAS-SEARCH: type=snapshot-4 DBG-DELTAS-SEARCH: size=18296 @@ -248,7 +304,14 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=30377 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD) - DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #2 - 2 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4325 + DBG-DELTAS-SEARCH: type=snapshot-3 + DBG-DELTAS-SEARCH: size=17384 + DBG-DELTAS-SEARCH: base=4318 + DBG-DELTAS-SEARCH: uncompressed-delta-size=178121 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=109064 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4930 DBG-DELTAS-SEARCH: type=snapshot-3 DBG-DELTAS-SEARCH: size=39228 @@ -265,10 +328,24 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=82661 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD) - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=6 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 --source p2 DBG-DELTAS-SEARCH: SEARCH rev=4971 - DBG-DELTAS-SEARCH: ROUND #1 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #1 - 3 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4329 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=13781 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=173765 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=106516 (BAD) + DBG-DELTAS-SEARCH: CANDIDATE: rev=4335 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=14385 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=171587 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=105187 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4962 DBG-DELTAS-SEARCH: type=snapshot-4 DBG-DELTAS-SEARCH: size=18296 @@ -276,7 +353,14 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=30377 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD) - DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #2 - 2 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4325 + DBG-DELTAS-SEARCH: type=snapshot-3 + DBG-DELTAS-SEARCH: size=17384 + DBG-DELTAS-SEARCH: base=4318 + DBG-DELTAS-SEARCH: uncompressed-delta-size=178121 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=109064 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4930 DBG-DELTAS-SEARCH: type=snapshot-3 DBG-DELTAS-SEARCH: size=39228 @@ -293,10 +377,24 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=82661 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD) - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=6 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ hg debug-delta-find SPARSE-REVLOG-TEST-FILE 4971 --source prev DBG-DELTAS-SEARCH: SEARCH rev=4971 - DBG-DELTAS-SEARCH: ROUND #1 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #1 - 3 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4329 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=13781 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=173765 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=106516 (BAD) + DBG-DELTAS-SEARCH: CANDIDATE: rev=4335 + DBG-DELTAS-SEARCH: type=snapshot-4 + DBG-DELTAS-SEARCH: size=14385 + DBG-DELTAS-SEARCH: base=4325 + DBG-DELTAS-SEARCH: uncompressed-delta-size=171587 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=105187 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4962 DBG-DELTAS-SEARCH: type=snapshot-4 DBG-DELTAS-SEARCH: size=18296 @@ -304,7 +402,14 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=30377 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=16872 (BAD) - DBG-DELTAS-SEARCH: ROUND #2 - 1 candidates - search-down + DBG-DELTAS-SEARCH: ROUND #2 - 2 candidates - search-down + DBG-DELTAS-SEARCH: CANDIDATE: rev=4325 + DBG-DELTAS-SEARCH: type=snapshot-3 + DBG-DELTAS-SEARCH: size=17384 + DBG-DELTAS-SEARCH: base=4318 + DBG-DELTAS-SEARCH: uncompressed-delta-size=178121 + DBG-DELTAS-SEARCH: delta-search-time=* (glob) + DBG-DELTAS-SEARCH: DELTA: length=109064 (BAD) DBG-DELTAS-SEARCH: CANDIDATE: rev=4930 DBG-DELTAS-SEARCH: type=snapshot-3 DBG-DELTAS-SEARCH: size=39228 @@ -321,6 +426,6 @@ DBG-DELTAS-SEARCH: uncompressed-delta-size=82661 DBG-DELTAS-SEARCH: delta-search-time=* (glob) DBG-DELTAS-SEARCH: DELTA: length=49132 (BAD) - DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=3 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) + DBG-DELTAS: FILELOG:SPARSE-REVLOG-TEST-FILE: rev=4971: delta-base=4930 is-cached=0 - search-rounds=3 try-count=6 - delta-type=snapshot snap-depth=4 - p1-chain-length=15 p2-chain-length=-1 - duration=* (glob) $ cd ..
--- a/tests/test-ssh-bundle1.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-ssh-bundle1.t Mon Feb 12 16:22:47 2024 +0100 @@ -62,10 +62,12 @@ $ hg clone --stream ssh://user@dummy/remote local-stream streaming all changes - 4 files to transfer, 602 bytes of data (no-zstd !) + 5 files to transfer, 602 bytes of data (no-zstd !) transferred 602 bytes in * seconds (*) (glob) (no-zstd !) - 4 files to transfer, 621 bytes of data (zstd !) - transferred 621 bytes in * seconds (* */sec) (glob) (zstd !) + 5 files to transfer, 621 bytes of data (zstd no-rust !) + transferred 621 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 7 files to transfer, 747 bytes of data (zstd rust !) + transferred 747 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found updating to branch default @@ -81,10 +83,12 @@ $ hg -R local-stream book mybook $ hg clone --stream ssh://user@dummy/local-stream stream2 streaming all changes - 4 files to transfer, 602 bytes of data (no-zstd !) + 5 files to transfer, 602 bytes of data (no-zstd !) transferred 602 bytes in * seconds (*) (glob) (no-zstd !) - 4 files to transfer, 621 bytes of data (zstd !) - transferred 621 bytes in * seconds (* */sec) (glob) (zstd !) + 5 files to transfer, 621 bytes of data (zstd no-rust !) + transferred 621 bytes in * seconds (* */sec) (glob) (zstd no-rust !) + 7 files to transfer, 747 bytes of data (zstd rust !) + transferred 747 bytes in * seconds (*/sec) (glob) (zstd rust !) searching for changes no changes found updating to branch default
--- a/tests/test-ssh.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-ssh.t Mon Feb 12 16:22:47 2024 +0100 @@ -54,9 +54,10 @@ $ hg clone --stream ssh://user@dummy/remote local-stream streaming all changes - 8 files to transfer, 827 bytes of data (no-zstd !) + 9 files to transfer, 827 bytes of data (no-zstd !) transferred 827 bytes in * seconds (*) (glob) (no-zstd !) - 8 files to transfer, 846 bytes of data (zstd !) + 9 files to transfer, 846 bytes of data (zstd no-rust !) + 11 files to transfer, 972 bytes of data (zstd rust !) transferred * bytes in * seconds (* */sec) (glob) (zstd !) updating to branch default 2 files updated, 0 files merged, 0 files removed, 0 files unresolved @@ -71,7 +72,8 @@ $ hg -R local-stream book mybook $ hg clone --stream ssh://user@dummy/local-stream stream2 streaming all changes - 15 files to transfer, * of data (glob) + 16 files to transfer, * of data (glob) (no-rust !) + 18 files to transfer, * of data (glob) (rust !) transferred * in * seconds (*) (glob) updating to branch default 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
--- a/tests/test-static-http.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-static-http.t Mon Feb 12 16:22:47 2024 +0100 @@ -225,6 +225,7 @@ /.hg/cache/hgtagsfnodes1 /.hg/dirstate /.hg/requires + /.hg/store/00changelog.d /.hg/store/00changelog.i /.hg/store/00manifest.i /.hg/store/data/%7E2ehgsub.i (no-py37 !) @@ -252,6 +253,7 @@ /remote-with-names/.hg/dirstate /remote-with-names/.hg/localtags /remote-with-names/.hg/requires + /remote-with-names/.hg/store/00changelog.d /remote-with-names/.hg/store/00changelog.i /remote-with-names/.hg/store/00manifest.i /remote-with-names/.hg/store/data/%7E2ehgtags.i (no-py37 !) @@ -270,6 +272,7 @@ /remote/.hg/dirstate /remote/.hg/localtags /remote/.hg/requires + /remote/.hg/store/00changelog.d /remote/.hg/store/00changelog.i /remote/.hg/store/00manifest.i /remote/.hg/store/data/%7E2edotfile%20with%20spaces.i (no-py37 !) @@ -292,6 +295,7 @@ /sub/.hg/cache/hgtagsfnodes1 /sub/.hg/dirstate /sub/.hg/requires + /sub/.hg/store/00changelog.d /sub/.hg/store/00changelog.i /sub/.hg/store/00manifest.i /sub/.hg/store/data/%7E2ehgtags.i (no-py37 !)
--- a/tests/test-status-rev.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-status-rev.t Mon Feb 12 16:22:47 2024 +0100 @@ -88,6 +88,33 @@ Status between first and second commit. Should ignore dirstate status. + $ hg status -marc --rev 0 --rev 1 --config rhg.on-unsupported=abort + M content1_content2_content1-tracked + M content1_content2_content1-untracked + M content1_content2_content2-tracked + M content1_content2_content2-untracked + M content1_content2_content3-tracked + M content1_content2_content3-untracked + M content1_content2_missing-tracked + M content1_content2_missing-untracked + A missing_content2_content2-tracked + A missing_content2_content2-untracked + A missing_content2_content3-tracked + A missing_content2_content3-untracked + A missing_content2_missing-tracked + A missing_content2_missing-untracked + R content1_missing_content1-tracked + R content1_missing_content1-untracked + R content1_missing_content3-tracked + R content1_missing_content3-untracked + R content1_missing_missing-tracked + R content1_missing_missing-untracked + C content1_content1_content1-tracked + C content1_content1_content1-untracked + C content1_content1_content3-tracked + C content1_content1_content3-untracked + C content1_content1_missing-tracked + C content1_content1_missing-untracked $ hg status -A --rev 0:1 'glob:content1_content2_*' M content1_content2_content1-tracked M content1_content2_content1-untracked
--- a/tests/test-storage.py Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-storage.py Mon Feb 12 16:22:47 2024 +0100 @@ -15,6 +15,17 @@ from mercurial.testing import storage as storagetesting try: + from mercurial import rustext + + rustext.__name__ + # Does not pass with pure Rust index + import sys + + sys.exit(80) +except ImportError: + pass + +try: from hgext import sqlitestore except ImportError: sqlitestore = None
--- a/tests/test-stream-bundle-v2.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-stream-bundle-v2.t Mon Feb 12 16:22:47 2024 +0100 @@ -60,9 +60,9 @@ $ hg bundle -a --type="none-v2;stream=$stream_version" bundle.hg $ hg debugbundle bundle.hg Stream params: {} - stream2 -- {bytecount: 1693, filecount: 11, requirements: generaldelta%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v2 no-zstd !) - stream2 -- {bytecount: 1693, filecount: 11, requirements: generaldelta%2Crevlog-compression-zstd%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v2 zstd no-rust !) - stream2 -- {bytecount: 1693, filecount: 11, requirements: generaldelta%2Crevlog-compression-zstd%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v2 rust !) + stream2 -- {bytecount: 1693, filecount: 12, requirements: generaldelta%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v2 no-zstd !) + stream2 -- {bytecount: 1693, filecount: 12, requirements: generaldelta%2Crevlog-compression-zstd%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v2 zstd no-rust !) + stream2 -- {bytecount: 1819, filecount: 14, requirements: generaldelta%2Crevlog-compression-zstd%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v2 rust !) stream3-exp -- {requirements: generaldelta%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v3 no-zstd !) stream3-exp -- {requirements: generaldelta%2Crevlog-compression-zstd%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v3 zstd no-rust !) stream3-exp -- {requirements: generaldelta%2Crevlog-compression-zstd%2Crevlogv1%2Csparserevlog} (mandatory: True) (stream-v3 rust !) @@ -97,7 +97,8 @@ bundle2-input-bundle: with-transaction bundle2-input-part: "stream2" (params: 3 mandatory) supported applying stream bundle - 11 files to transfer, 1.65 KB of data + 12 files to transfer, 1.65 KB of data (no-rust !) + 14 files to transfer, 1.78 KB of data (rust !) starting 4 threads for background file closing (?) starting 4 threads for background file closing (?) adding [s] data/A.i (66 bytes) @@ -107,12 +108,17 @@ adding [s] data/E.i (66 bytes) adding [s] phaseroots (43 bytes) adding [s] 00manifest.i (584 bytes) - adding [s] 00changelog.i (595 bytes) + adding [s] 00changelog.n (62 bytes) (rust !) + adding [s] 00changelog-b875dfc5.nd (64 bytes) (rust !) + adding [s] 00changelog.d (275 bytes) + adding [s] 00changelog.i (320 bytes) adding [c] branch2-served (94 bytes) adding [c] rbc-names-v1 (7 bytes) adding [c] rbc-revs-v1 (40 bytes) - transferred 1.65 KB in * seconds (* */sec) (glob) - bundle2-input-part: total payload size 1840 + transferred 1.65 KB in * seconds (* */sec) (glob) (no-rust !) + bundle2-input-part: total payload size 1857 (no-rust !) + transferred 1.78 KB in * seconds (* */sec) (glob) (rust !) + bundle2-input-part: total payload size 2025 (rust !) bundle2-input-bundle: 1 parts total updating the branch cache finished applying clone bundle @@ -154,7 +160,8 @@ bundle2-input-bundle: with-transaction bundle2-input-part: "stream2" (params: 3 mandatory) supported applying stream bundle - 11 files to transfer, 1.65 KB of data + 12 files to transfer, 1.65 KB of data (no-rust !) + 14 files to transfer, 1.78 KB of data (rust !) starting 4 threads for background file closing (?) starting 4 threads for background file closing (?) adding [s] data/A.i (66 bytes) @@ -164,12 +171,17 @@ adding [s] data/E.i (66 bytes) adding [s] phaseroots (43 bytes) adding [s] 00manifest.i (584 bytes) - adding [s] 00changelog.i (595 bytes) + adding [s] 00changelog.n (62 bytes) (rust !) + adding [s] 00changelog-b875dfc5.nd (64 bytes) (rust !) + adding [s] 00changelog.d (275 bytes) + adding [s] 00changelog.i (320 bytes) adding [c] branch2-served (94 bytes) adding [c] rbc-names-v1 (7 bytes) adding [c] rbc-revs-v1 (40 bytes) - transferred 1.65 KB in * seconds (* */sec) (glob) - bundle2-input-part: total payload size 1840 + transferred 1.65 KB in * seconds (* */sec) (glob) (no-rust !) + bundle2-input-part: total payload size 1857 (no-rust !) + transferred 1.78 KB in * seconds (* */sec) (glob) (rust !) + bundle2-input-part: total payload size 2025 (rust !) bundle2-input-bundle: 1 parts total updating the branch cache finished applying clone bundle @@ -224,12 +236,17 @@ adding [s] data/E.i (66 bytes) adding [s] phaseroots (43 bytes) adding [s] 00manifest.i (584 bytes) - adding [s] 00changelog.i (595 bytes) + adding [s] 00changelog.n (62 bytes) (rust !) + adding [s] 00changelog-b875dfc5.nd (64 bytes) (rust !) + adding [s] 00changelog.d (275 bytes) + adding [s] 00changelog.i (320 bytes) adding [c] branch2-served (94 bytes) adding [c] rbc-names-v1 (7 bytes) adding [c] rbc-revs-v1 (40 bytes) - transferred 1.65 KB in * seconds (* */sec) (glob) - bundle2-input-part: total payload size 1852 + transferred 1.65 KB in * seconds (* */sec) (glob) (no-rust !) + bundle2-input-part: total payload size 1869 (no-rust !) + transferred 1.78 KB in * seconds (* */sec) (glob) (rust !) + bundle2-input-part: total payload size 2037 (rust !) bundle2-input-bundle: 1 parts total updating the branch cache finished applying clone bundle @@ -281,12 +298,17 @@ adding [s] data/E.i (66 bytes) adding [s] phaseroots (43 bytes) adding [s] 00manifest.i (584 bytes) - adding [s] 00changelog.i (595 bytes) + adding [s] 00changelog.n (62 bytes) (rust !) + adding [s] 00changelog-b875dfc5.nd (64 bytes) (rust !) + adding [s] 00changelog.d (275 bytes) + adding [s] 00changelog.i (320 bytes) adding [c] branch2-served (94 bytes) adding [c] rbc-names-v1 (7 bytes) adding [c] rbc-revs-v1 (40 bytes) - transferred 1.65 KB in * seconds (* */sec) (glob) - bundle2-input-part: total payload size 1852 + transferred 1.65 KB in * seconds (* */sec) (glob) (no-rust !) + bundle2-input-part: total payload size 1869 (no-rust !) + transferred 1.78 KB in * seconds (* */sec) (glob) (rust !) + bundle2-input-part: total payload size 2037 (rust !) bundle2-input-bundle: 1 parts total updating the branch cache finished applying clone bundle
--- a/tests/test-subrepo-deep-nested-change.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-subrepo-deep-nested-change.t Mon Feb 12 16:22:47 2024 +0100 @@ -28,12 +28,22 @@ $ echo "sub2 = ../sub2" > sub1/.hgsub $ hg clone sub2 sub1/sub2 \r (no-eol) (esc) - linking [======> ] 1/6\r (no-eol) (esc) - linking [==============> ] 2/6\r (no-eol) (esc) - linking [=====================> ] 3/6\r (no-eol) (esc) - linking [=============================> ] 4/6\r (no-eol) (esc) - linking [====================================> ] 5/6\r (no-eol) (esc) - linking [============================================>] 6/6\r (no-eol) (esc) + linking [=====> ] 1/7\r (no-eol) (esc) (no-rust !) + linking [===========> ] 2/7\r (no-eol) (esc) (no-rust !) + linking [==================> ] 3/7\r (no-eol) (esc) (no-rust !) + linking [========================> ] 4/7\r (no-eol) (esc) (no-rust !) + linking [===============================> ] 5/7\r (no-eol) (esc) (no-rust !) + linking [=====================================> ] 6/7\r (no-eol) (esc) (no-rust !) + linking [============================================>] 7/7\r (no-eol) (esc) (no-rust !) + linking [====> ] 1/9\r (no-eol) (esc) (rust !) + linking [=========> ] 2/9\r (no-eol) (esc) (rust !) + linking [==============> ] 3/9\r (no-eol) (esc) (rust !) + linking [===================> ] 4/9\r (no-eol) (esc) (rust !) + linking [========================> ] 5/9\r (no-eol) (esc) (rust !) + linking [=============================> ] 6/9\r (no-eol) (esc) (rust !) + linking [==================================> ] 7/9\r (no-eol) (esc) (rust !) + linking [=======================================> ] 8/9\r (no-eol) (esc) (rust !) + linking [============================================>] 9/9\r (no-eol) (esc) (rust !) \r (no-eol) (esc) \r (no-eol) (esc) updating [===========================================>] 1/1\r (no-eol) (esc) @@ -50,32 +60,70 @@ $ hg init main $ echo main > main/main $ echo "sub1 = ../sub1" > main/.hgsub + +#if rust $ hg clone sub1 main/sub1 \r (no-eol) (esc) - linking [====> ] 1/8\r (no-eol) (esc) - linking [==========> ] 2/8\r (no-eol) (esc) - linking [===============> ] 3/8\r (no-eol) (esc) - linking [=====================> ] 4/8\r (no-eol) (esc) - linking [===========================> ] 5/8\r (no-eol) (esc) - linking [================================> ] 6/8\r (no-eol) (esc) - linking [======================================> ] 7/8\r (no-eol) (esc) - linking [============================================>] 8/8\r (no-eol) (esc) + linking [==> ] 1/11\r (no-eol) (esc) + linking [======> ] 2/11\r (no-eol) (esc) + linking [==========> ] 3/11\r (no-eol) (esc) + linking [==============> ] 4/11\r (no-eol) (esc) + linking [==================> ] 5/11\r (no-eol) (esc) + linking [======================> ] 6/11\r (no-eol) (esc) + linking [==========================> ] 7/11\r (no-eol) (esc) + linking [==============================> ] 8/11\r (no-eol) (esc) + linking [==================================> ] 9/11\r (no-eol) (esc) + linking [======================================> ] 10/11\r (no-eol) (esc) + linking [==========================================>] 11/11\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) updating [===========================================>] 3/3\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) - linking [======> ] 1/6\r (no-eol) (esc) - linking [==============> ] 2/6\r (no-eol) (esc) - linking [=====================> ] 3/6\r (no-eol) (esc) - linking [=============================> ] 4/6\r (no-eol) (esc) - linking [====================================> ] 5/6\r (no-eol) (esc) - linking [============================================>] 6/6\r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) + linking [=========> ] 2/9\r (no-eol) (esc) + linking [==============> ] 3/9\r (no-eol) (esc) + linking [===================> ] 4/9\r (no-eol) (esc) + linking [========================> ] 5/9\r (no-eol) (esc) + linking [=============================> ] 6/9\r (no-eol) (esc) + linking [==================================> ] 7/9\r (no-eol) (esc) + linking [=======================================> ] 8/9\r (no-eol) (esc) + linking [============================================>] 9/9\r (no-eol) (esc) updating [===========================================>] 1/1\r (no-eol) (esc) \r (no-eol) (esc) updating to branch default cloning subrepo sub2 from $TESTTMP/sub2 3 files updated, 0 files merged, 0 files removed, 0 files unresolved +#else + $ hg clone sub1 main/sub1 + \r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) + linking [=========> ] 2/9\r (no-eol) (esc) + linking [==============> ] 3/9\r (no-eol) (esc) + linking [===================> ] 4/9\r (no-eol) (esc) + linking [========================> ] 5/9\r (no-eol) (esc) + linking [=============================> ] 6/9\r (no-eol) (esc) + linking [==================================> ] 7/9\r (no-eol) (esc) + linking [=======================================> ] 8/9\r (no-eol) (esc) + linking [============================================>] 9/9\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + updating [===========================================>] 3/3\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + linking [=====> ] 1/7\r (no-eol) (esc) + linking [===========> ] 2/7\r (no-eol) (esc) + linking [==================> ] 3/7\r (no-eol) (esc) + linking [========================> ] 4/7\r (no-eol) (esc) + linking [===============================> ] 5/7\r (no-eol) (esc) + linking [=====================================> ] 6/7\r (no-eol) (esc) + linking [============================================>] 7/7\r (no-eol) (esc) + updating [===========================================>] 1/1\r (no-eol) (esc) + \r (no-eol) (esc) + updating to branch default + cloning subrepo sub2 from $TESTTMP/sub2 + 3 files updated, 0 files merged, 0 files removed, 0 files unresolved +#endif $ hg add -R main adding main/.hgsub adding main/main @@ -152,44 +200,106 @@ Clone main +#if rust $ hg --config extensions.largefiles= clone main cloned \r (no-eol) (esc) - linking [====> ] 1/8\r (no-eol) (esc) - linking [==========> ] 2/8\r (no-eol) (esc) - linking [===============> ] 3/8\r (no-eol) (esc) - linking [=====================> ] 4/8\r (no-eol) (esc) - linking [===========================> ] 5/8\r (no-eol) (esc) - linking [================================> ] 6/8\r (no-eol) (esc) - linking [======================================> ] 7/8\r (no-eol) (esc) - linking [============================================>] 8/8\r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) (no-rust !) + linking [=========> ] 2/9\r (no-eol) (esc) (no-rust !) + linking [==============> ] 3/9\r (no-eol) (esc) (no-rust !) + linking [===================> ] 4/9\r (no-eol) (esc) (no-rust !) + linking [========================> ] 5/9\r (no-eol) (esc) (no-rust !) + linking [=============================> ] 6/9\r (no-eol) (esc) (no-rust !) + linking [==================================> ] 7/9\r (no-eol) (esc) (no-rust !) + linking [=======================================> ] 8/9\r (no-eol) (esc) (no-rust !) + linking [============================================>] 9/9\r (no-eol) (esc) (no-rust !) + linking [==> ] 1/11\r (no-eol) (esc) (rust !) + linking [======> ] 2/11\r (no-eol) (esc) (rust !) + linking [==========> ] 3/11\r (no-eol) (esc) (rust !) + linking [==============> ] 4/11\r (no-eol) (esc) (rust !) + linking [==================> ] 5/11\r (no-eol) (esc) (rust !) + linking [======================> ] 6/11\r (no-eol) (esc) (rust !) + linking [==========================> ] 7/11\r (no-eol) (esc) (rust !) + linking [==============================> ] 8/11\r (no-eol) (esc) (rust !) + linking [==================================> ] 9/11\r (no-eol) (esc) (rust !) + linking [======================================> ] 10/11\r (no-eol) (esc) (rust !) + linking [==========================================>] 11/11\r (no-eol) (esc) (rust !) \r (no-eol) (esc) \r (no-eol) (esc) updating [===========================================>] 3/3\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) - linking [====> ] 1/8\r (no-eol) (esc) - linking [==========> ] 2/8\r (no-eol) (esc) - linking [===============> ] 3/8\r (no-eol) (esc) - linking [=====================> ] 4/8\r (no-eol) (esc) - linking [===========================> ] 5/8\r (no-eol) (esc) - linking [================================> ] 6/8\r (no-eol) (esc) - linking [======================================> ] 7/8\r (no-eol) (esc) - linking [============================================>] 8/8\r (no-eol) (esc) + linking [==> ] 1/11\r (no-eol) (esc) + linking [======> ] 2/11\r (no-eol) (esc) + linking [==========> ] 3/11\r (no-eol) (esc) + linking [==============> ] 4/11\r (no-eol) (esc) + linking [==================> ] 5/11\r (no-eol) (esc) + linking [======================> ] 6/11\r (no-eol) (esc) + linking [==========================> ] 7/11\r (no-eol) (esc) + linking [==============================> ] 8/11\r (no-eol) (esc) + linking [==================================> ] 9/11\r (no-eol) (esc) + linking [======================================> ] 10/11\r (no-eol) (esc) + linking [==========================================>] 11/11\r (no-eol) (esc) updating [===========================================>] 3/3\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) - linking [======> ] 1/6\r (no-eol) (esc) - linking [==============> ] 2/6\r (no-eol) (esc) - linking [=====================> ] 3/6\r (no-eol) (esc) - linking [=============================> ] 4/6\r (no-eol) (esc) - linking [====================================> ] 5/6\r (no-eol) (esc) - linking [============================================>] 6/6\r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) + linking [=========> ] 2/9\r (no-eol) (esc) + linking [==============> ] 3/9\r (no-eol) (esc) + linking [===================> ] 4/9\r (no-eol) (esc) + linking [========================> ] 5/9\r (no-eol) (esc) + linking [=============================> ] 6/9\r (no-eol) (esc) + linking [==================================> ] 7/9\r (no-eol) (esc) + linking [=======================================> ] 8/9\r (no-eol) (esc) + linking [============================================>] 9/9\r (no-eol) (esc) updating [===========================================>] 1/1\r (no-eol) (esc) \r (no-eol) (esc) updating to branch default cloning subrepo sub1 from $TESTTMP/sub1 cloning subrepo sub1/sub2 from $TESTTMP/sub2 3 files updated, 0 files merged, 0 files removed, 0 files unresolved +#else + $ hg --config extensions.largefiles= clone main cloned + \r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) + linking [=========> ] 2/9\r (no-eol) (esc) + linking [==============> ] 3/9\r (no-eol) (esc) + linking [===================> ] 4/9\r (no-eol) (esc) + linking [========================> ] 5/9\r (no-eol) (esc) + linking [=============================> ] 6/9\r (no-eol) (esc) + linking [==================================> ] 7/9\r (no-eol) (esc) + linking [=======================================> ] 8/9\r (no-eol) (esc) + linking [============================================>] 9/9\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + updating [===========================================>] 3/3\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) + linking [=========> ] 2/9\r (no-eol) (esc) + linking [==============> ] 3/9\r (no-eol) (esc) + linking [===================> ] 4/9\r (no-eol) (esc) + linking [========================> ] 5/9\r (no-eol) (esc) + linking [=============================> ] 6/9\r (no-eol) (esc) + linking [==================================> ] 7/9\r (no-eol) (esc) + linking [=======================================> ] 8/9\r (no-eol) (esc) + linking [============================================>] 9/9\r (no-eol) (esc) + updating [===========================================>] 3/3\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + linking [=====> ] 1/7\r (no-eol) (esc) + linking [===========> ] 2/7\r (no-eol) (esc) + linking [==================> ] 3/7\r (no-eol) (esc) + linking [========================> ] 4/7\r (no-eol) (esc) + linking [===============================> ] 5/7\r (no-eol) (esc) + linking [=====================================> ] 6/7\r (no-eol) (esc) + linking [============================================>] 7/7\r (no-eol) (esc) + updating [===========================================>] 1/1\r (no-eol) (esc) + \r (no-eol) (esc) + updating to branch default + cloning subrepo sub1 from $TESTTMP/sub1 + cloning subrepo sub1/sub2 from $TESTTMP/sub2 + 3 files updated, 0 files merged, 0 files removed, 0 files unresolved +#endif Largefiles is NOT enabled in the clone if the source repo doesn't require it $ hg debugrequires -R cloned | grep largefiles
--- a/tests/test-subrepo-recursion.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-subrepo-recursion.t Mon Feb 12 16:22:47 2024 +0100 @@ -454,6 +454,65 @@ #if hardlink $ hg clone -U . ../empty \r (no-eol) (esc) + linking [===> ] 1/10\r (no-eol) (esc) (no-rust !) + linking [=======> ] 2/10\r (no-eol) (esc) (no-rust !) + linking [===========> ] 3/10\r (no-eol) (esc) (no-rust !) + linking [================> ] 4/10\r (no-eol) (esc) (no-rust !) + linking [====================> ] 5/10\r (no-eol) (esc) (no-rust !) + linking [========================> ] 6/10\r (no-eol) (esc) (no-rust !) + linking [=============================> ] 7/10\r (no-eol) (esc) (no-rust !) + linking [=================================> ] 8/10\r (no-eol) (esc) (no-rust !) + linking [=====================================> ] 9/10\r (no-eol) (esc) (no-rust !) + linking [==========================================>] 10/10\r (no-eol) (esc) (no-rust !) + linking [==> ] 1/12\r (no-eol) (esc) (rust !) + linking [======> ] 2/12\r (no-eol) (esc) (rust !) + linking [=========> ] 3/12\r (no-eol) (esc) (rust !) + linking [=============> ] 4/12\r (no-eol) (esc) (rust !) + linking [================> ] 5/12\r (no-eol) (esc) (rust !) + linking [====================> ] 6/12\r (no-eol) (esc) (rust !) + linking [========================> ] 7/12\r (no-eol) (esc) (rust !) + linking [===========================> ] 8/12\r (no-eol) (esc) (rust !) + linking [===============================> ] 9/12\r (no-eol) (esc) (rust !) + linking [==================================> ] 10/12\r (no-eol) (esc) (rust !) + linking [======================================> ] 11/12\r (no-eol) (esc) (rust !) + linking [==========================================>] 12/12\r (no-eol) (esc) (rust !) + \r (no-eol) (esc) +#else + $ hg clone -U . ../empty + \r (no-eol) (esc) + linking [ <=> ] 1 (no-eol) +#endif + + $ cd ../empty +#if hardlink +#if rust + $ hg archive --subrepos -r tip --prefix './' ../archive.tar.gz + \r (no-eol) (esc) + archiving [ ] 0/3\r (no-eol) (esc) + archiving [=============> ] 1/3\r (no-eol) (esc) + archiving [===========================> ] 2/3\r (no-eol) (esc) + archiving [==========================================>] 3/3\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + linking [==> ] 1/11\r (no-eol) (esc) + linking [======> ] 2/11\r (no-eol) (esc) + linking [==========> ] 3/11\r (no-eol) (esc) + linking [==============> ] 4/11\r (no-eol) (esc) + linking [==================> ] 5/11\r (no-eol) (esc) + linking [======================> ] 6/11\r (no-eol) (esc) + linking [==========================> ] 7/11\r (no-eol) (esc) + linking [==============================> ] 8/11\r (no-eol) (esc) + linking [==================================> ] 9/11\r (no-eol) (esc) + linking [======================================> ] 10/11\r (no-eol) (esc) + linking [==========================================>] 11/11\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) + archiving (foo) [ ] 0/3\r (no-eol) (esc) + archiving (foo) [===========> ] 1/3\r (no-eol) (esc) + archiving (foo) [=======================> ] 2/3\r (no-eol) (esc) + archiving (foo) [====================================>] 3/3\r (no-eol) (esc) + \r (no-eol) (esc) + \r (no-eol) (esc) linking [====> ] 1/9\r (no-eol) (esc) linking [=========> ] 2/9\r (no-eol) (esc) linking [==============> ] 3/9\r (no-eol) (esc) @@ -464,14 +523,13 @@ linking [=======================================> ] 8/9\r (no-eol) (esc) linking [============================================>] 9/9\r (no-eol) (esc) \r (no-eol) (esc) + \r (no-eol) (esc) + archiving (foo/bar) [ ] 0/1\r (no-eol) (esc) + archiving (foo/bar) [================================>] 1/1\r (no-eol) (esc) + \r (no-eol) (esc) + cloning subrepo foo from $TESTTMP/repo/foo + cloning subrepo foo/bar from $TESTTMP/repo/foo/bar #else - $ hg clone -U . ../empty - \r (no-eol) (esc) - linking [ <=> ] 1 (no-eol) -#endif - - $ cd ../empty -#if hardlink $ hg archive --subrepos -r tip --prefix './' ../archive.tar.gz \r (no-eol) (esc) archiving [ ] 0/3\r (no-eol) (esc) @@ -480,14 +538,15 @@ archiving [==========================================>] 3/3\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) - linking [====> ] 1/8\r (no-eol) (esc) - linking [==========> ] 2/8\r (no-eol) (esc) - linking [===============> ] 3/8\r (no-eol) (esc) - linking [=====================> ] 4/8\r (no-eol) (esc) - linking [===========================> ] 5/8\r (no-eol) (esc) - linking [================================> ] 6/8\r (no-eol) (esc) - linking [======================================> ] 7/8\r (no-eol) (esc) - linking [============================================>] 8/8\r (no-eol) (esc) + linking [====> ] 1/9\r (no-eol) (esc) + linking [=========> ] 2/9\r (no-eol) (esc) + linking [==============> ] 3/9\r (no-eol) (esc) + linking [===================> ] 4/9\r (no-eol) (esc) + linking [========================> ] 5/9\r (no-eol) (esc) + linking [=============================> ] 6/9\r (no-eol) (esc) + linking [==================================> ] 7/9\r (no-eol) (esc) + linking [=======================================> ] 8/9\r (no-eol) (esc) + linking [============================================>] 9/9\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) archiving (foo) [ ] 0/3\r (no-eol) (esc) @@ -496,12 +555,13 @@ archiving (foo) [====================================>] 3/3\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) - linking [======> ] 1/6\r (no-eol) (esc) - linking [==============> ] 2/6\r (no-eol) (esc) - linking [=====================> ] 3/6\r (no-eol) (esc) - linking [=============================> ] 4/6\r (no-eol) (esc) - linking [====================================> ] 5/6\r (no-eol) (esc) - linking [============================================>] 6/6\r (no-eol) (esc) + linking [=====> ] 1/7\r (no-eol) (esc) + linking [===========> ] 2/7\r (no-eol) (esc) + linking [==================> ] 3/7\r (no-eol) (esc) + linking [========================> ] 4/7\r (no-eol) (esc) + linking [===============================> ] 5/7\r (no-eol) (esc) + linking [=====================================> ] 6/7\r (no-eol) (esc) + linking [============================================>] 7/7\r (no-eol) (esc) \r (no-eol) (esc) \r (no-eol) (esc) archiving (foo/bar) [ ] 0/1\r (no-eol) (esc) @@ -509,6 +569,7 @@ \r (no-eol) (esc) cloning subrepo foo from $TESTTMP/repo/foo cloning subrepo foo/bar from $TESTTMP/repo/foo/bar +#endif #else Note there's a slight output glitch on non-hardlink systems: the last "linking" progress topic never gets closed, leading to slight output corruption on that platform.
--- a/tests/test-symlinks.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-symlinks.t Mon Feb 12 16:22:47 2024 +0100 @@ -188,6 +188,35 @@ $ cd .. +== symlinks and add with --include == + +directory moved and symlinked + + $ hg init add-include + $ cd add-include + $ mkdir foo + $ touch foo/a + $ hg ci -Ama + adding foo/a + $ hg mv foo bar + moving foo/a to bar/a + $ ln -s bar foo + $ hg status + A bar/a + R foo/a + ? foo + +can add with --include + + $ hg add -I foo + adding foo + $ hg status + A bar/a + A foo + R foo/a + + $ cd .. + == root of repository is symlinked == $ hg init root
--- a/tests/test-template-map.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-template-map.t Mon Feb 12 16:22:47 2024 +0100 @@ -766,7 +766,26 @@ ], 'desc': 'third', 'diff': 'diff -r 29114dbae42b -r 95c24699272e fourth\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/fourth\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+second\ndiff -r 29114dbae42b -r 95c24699272e second\n--- a/second\tMon Jan 12 13:46:40 1970 +0000\n+++ /dev/null\tThu Jan 01 00:00:00 1970 +0000\n@@ -1,1 +0,0 @@\n-second\ndiff -r 29114dbae42b -r 95c24699272e third\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/third\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+third\n', - 'diffstat': ' fourth | 1 +\n second | 1 -\n third | 1 +\n 3 files changed, 2 insertions(+), 1 deletions(-)\n', + 'diffstat': [ + { + 'additions': 1, + 'binary': False, + 'name': 'fourth', + 'removals': 0 + }, + { + 'additions': 0, + 'binary': False, + 'name': 'second', + 'removals': 1 + }, + { + 'additions': 1, + 'binary': False, + 'name': 'third', + 'removals': 0 + } + ], 'files': [ 'fourth', 'second', @@ -820,7 +839,7 @@ "date": [1577872860, 0], "desc": "third", "diff": "diff -r 29114dbae42b -r 95c24699272e fourth\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/fourth\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+second\ndiff -r 29114dbae42b -r 95c24699272e second\n--- a/second\tMon Jan 12 13:46:40 1970 +0000\n+++ /dev/null\tThu Jan 01 00:00:00 1970 +0000\n@@ -1,1 +0,0 @@\n-second\ndiff -r 29114dbae42b -r 95c24699272e third\n--- /dev/null\tThu Jan 01 00:00:00 1970 +0000\n+++ b/third\tWed Jan 01 10:01:00 2020 +0000\n@@ -0,0 +1,1 @@\n+third\n", - "diffstat": " fourth | 1 +\n second | 1 -\n third | 1 +\n 3 files changed, 2 insertions(+), 1 deletions(-)\n", + "diffstat": [{"additions": 1, "binary": false, "name": "fourth", "removals": 0}, {"additions": 0, "binary": false, "name": "second", "removals": 1}, {"additions": 1, "binary": false, "name": "third", "removals": 0}], "files": ["fourth", "second", "third"], "node": "95c24699272ef57d062b8bccc32c878bf841784a", "parents": ["29114dbae42b9f078cf2714dbe3a86bba8ec7453"], @@ -1180,7 +1199,7 @@ $ hg log -r. -T'json(diffstat)' [ - {"diffstat": " fourth | 1 +\n second | 1 -\n third | 1 +\n 3 files changed, 2 insertions(+), 1 deletions(-)\n"} + {"diffstat": [{"additions": 1, "binary": false, "name": "fourth", "removals": 0}, {"additions": 0, "binary": false, "name": "second", "removals": 1}, {"additions": 1, "binary": false, "name": "third", "removals": 0}]} ] $ hg log -r. -T'json(manifest)'
--- a/tests/test-transaction-safety.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-transaction-safety.t Mon Feb 12 16:22:47 2024 +0100 @@ -89,7 +89,7 @@ > rm -f $TESTTMP/sync/* > rm -f $TESTTMP/output/* > hg log --rev 'tip' -T 'pre-commit: {rev} {desc}\n' - > echo x >> a + > echo x >> of > sh $TESTTMP/script/external.sh & hg commit -m "$1" > cat $TESTTMP/output/external.out > cat $TESTTMP/output/internal.out @@ -101,7 +101,7 @@ > rm -f $TESTTMP/sync/* > rm -f $TESTTMP/output/* > hg log --rev 'tip' -T 'pre-commit: {rev} {desc}\n' - > echo x >> a + > echo x >> of > sh $TESTTMP/script/external.sh & hg pull ../other-repo/ --rev "$1" --force --quiet > cat $TESTTMP/output/external.out > cat $TESTTMP/output/internal.out @@ -113,22 +113,22 @@ The source is large to unsure we don't use inline more after the pull $ hg init other-repo - $ hg -R other-repo debugbuilddag .+500 + $ hg -R other-repo debugbuilddag .+500 --overwritten-file prepare an empty repository where to make test: $ hg init repo $ cd repo - $ touch a - $ hg add a + $ touch of + $ hg add of prepare a small extension to controll inline size $ mkdir $TESTTMP/ext $ cat << EOF > $TESTTMP/ext/small_inline.py > from mercurial import revlog - > revlog._maxinline = 64 * 100 + > revlog._maxinline = 3 * 100 > EOF @@ -154,8 +154,8 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline - flags : inline + $ hg debugrevlog of | grep inline + flags : inline, * (glob) #endif @@ -166,8 +166,8 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline - flags : inline + $ hg debugrevlog of | grep inline + flags : inline, * (glob) #endif @@ -179,8 +179,8 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline - flags : inline + $ hg debugrevlog of | grep inline + flags : inline, * (glob) #endif @@ -191,8 +191,8 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline - flags : inline + $ hg debugrevlog of | grep inline + flags : inline, * (glob) #endif @@ -205,8 +205,8 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline - flags : inline + $ hg debugrevlog of | grep inline + flags : inline, * (glob) #endif @@ -217,8 +217,8 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline - flags : inline + $ hg debugrevlog of | grep inline + flags : inline, * (glob) #endif @@ -230,7 +230,7 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline + $ hg debugrevlog of | grep inline [1] #endif @@ -242,7 +242,7 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline + $ hg debugrevlog of | grep inline [1] #endif @@ -255,7 +255,7 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline + $ hg debugrevlog of | grep inline [1] #endif @@ -268,7 +268,7 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline + $ hg debugrevlog of | grep inline [1] #endif @@ -281,7 +281,7 @@ #if revlogv1 - $ hg debugrevlog -c | grep inline + $ hg debugrevlog of | grep inline [1] #endif
--- a/tests/test-treemanifest.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-treemanifest.t Mon Feb 12 16:22:47 2024 +0100 @@ -761,7 +761,8 @@ $ hg clone --config experimental.changegroup3=True --stream -U \ > http://localhost:$HGPORT1 stream-clone-basicstore streaming all changes - 28 files to transfer, * of data (glob) + 29 files to transfer, * of data (glob) (no-rust !) + 31 files to transfer, * of data (glob) (rust !) transferred * in * seconds (*) (glob) $ hg -R stream-clone-basicstore verify -q $ cat port-1-errors.log @@ -770,7 +771,8 @@ $ hg clone --config experimental.changegroup3=True --stream -U \ > http://localhost:$HGPORT2 stream-clone-encodedstore streaming all changes - 28 files to transfer, * of data (glob) + 29 files to transfer, * of data (glob) (no-rust !) + 31 files to transfer, * of data (glob) (rust !) transferred * in * seconds (*) (glob) $ hg -R stream-clone-encodedstore verify -q $ cat port-2-errors.log @@ -779,15 +781,17 @@ $ hg clone --config experimental.changegroup3=True --stream -U \ > http://localhost:$HGPORT stream-clone-fncachestore streaming all changes - 22 files to transfer, * of data (glob) + 23 files to transfer, * of data (glob) (no-rust !) + 25 files to transfer, * of data (glob) (rust !) transferred * in * seconds (*) (glob) $ hg -R stream-clone-fncachestore verify -q $ cat port-0-errors.log Packed bundle $ hg -R deeprepo debugcreatestreamclonebundle repo-packed.hg - writing 5330 bytes for 18 files (no-zstd !) - writing 5400 bytes for 18 files (zstd !) + writing 5330 bytes for 19 files (no-zstd !) + writing 5400 bytes for 19 files (zstd no-rust !) + writing 5654 bytes for 21 files (zstd rust !) bundle requirements:.* treemanifest(,.*)? (re) $ hg debugbundle --spec repo-packed.hg none-packed1;requirements%3D(.*%2C)?treemanifest(%2C.*)? (re)
--- a/tests/test-upgrade-repo.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-upgrade-repo.t Mon Feb 12 16:22:47 2024 +0100 @@ -836,7 +836,10 @@ store directory has files we expect $ ls .hg/store + 00changelog-????????.nd (glob) (rust !) + 00changelog.d 00changelog.i + 00changelog.n (rust !) 00manifest.i data data-s @@ -860,7 +863,10 @@ $ ls -d .hg/upgradebackup.*/ .hg/upgradebackup.*/ (glob) $ ls .hg/upgradebackup.*/store + 00changelog-????????.nd (glob) (rust !) + 00changelog.d 00changelog.i + 00changelog.n (rust !) 00manifest.i data data-s @@ -868,6 +874,7 @@ phaseroots requires undo + undo.backup.00changelog.n.bck (rust !) undo.backup.fncache.bck undo.backupfiles
--- a/tests/test-verify.t Mon Feb 12 16:17:08 2024 +0100 +++ b/tests/test-verify.t Mon Feb 12 16:22:47 2024 +0100 @@ -311,7 +311,8 @@ $ cat start b > .hg/store/data/a.i $ hg verify -q - a@1: broken revlog! (index a is corrupted) + a@1: broken revlog! (index a is corrupted) (no-rust !) + a@1: broken revlog! (abort: unexpected inline revlog length) (rust !) warning: orphan data file 'data/a.i' not checking dirstate because of previous errors 1 warnings encountered! @@ -335,6 +336,9 @@ checked 1 changesets with 1 changes to 1 files $ cd .. +# rust index does not allow creation of new flags dynamically +#if no-rust + test flag processor and skipflags $ hg init skipflags @@ -363,3 +367,4 @@ [1] $ hg verify --config verify.skipflags=2147483647 -q +#endif