view tests/testlib/ext-sidedata.py @ 51576:de5bf3fe0233

revset: stop serializing node when using "%ln" Turning hundred of thousand of node from node to hex and back can be slow… what about we stop doing it? In many case were we are using node id we should be using revision id. However this is not a good reason to have a stupidly slow implementation of "%ln". This caught my attention again because the phase discovery during push make an extensive use of "%ln" or huge set. In absolute, that phase discovery probably should use "%ld" and need to improves its algorithmic complexity, but improving "%ln" seems simple and long overdue. This greatly speeds up `hg push` on repository with many drafts. Here are some relevant poulpe benchmarks: ### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog # benchmark.name = hg.command.push # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.explicit-rev = all-out-heads # benchmark.variants.issue6528 = disabled # benchmark.variants.protocol = ssh # benchmark.variants.reuse-external-delta-parent = default ## benchmark.variants.revs = any-1-extra-rev before: 44.235070 after: 20.416329 (-53.85%, -23.82) ## benchmark.variants.revs = any-100-extra-rev before: 49.234697 after: 26.519829 (-46.14%, -22.71) ### benchmark.name = hg.command.bundle # bin-env-vars.hg.flavor = default # bin-env-vars.hg.py-re2-module = default # benchmark.variants.revs = all # benchmark.variants.type = none-streamv2 ## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog before: 10.138396 after: 7.750458 (-23.55%, -2.39) ## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog before: 1.263859 after: 0.700229 (-44.60%, -0.56) ## data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog before: 399.484481 after: 346.5089 (-13.26%, -52.98) ## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog before: 4.540080 after: 3.401700 (-25.07%, -1.14) ## data-env-vars.name = tryton-public-2024-03-22-zstd-sparse-revlog before: 2.975765 after: 1.870798 (-37.13%, -1.10)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Fri, 05 Apr 2024 11:05:54 +0200
parents 7c2dc75cdc0f
children
line wrap: on
line source

# ext-sidedata.py - small extension to test the sidedata logic
#
# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.


import hashlib
import struct

from mercurial.node import nullrev
from mercurial import (
    extensions,
    requirements,
    revlog,
)

from mercurial.upgrade_utils import engine as upgrade_engine

from mercurial.revlogutils import constants
from mercurial.revlogutils import sidedata


def wrapaddrevision(
    orig, self, text, transaction, link, p1, p2, *args, **kwargs
):
    if kwargs.get('sidedata') is None:
        kwargs['sidedata'] = {}
    sd = kwargs['sidedata']
    ## let's store some arbitrary data just for testing
    # text length
    sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
    # and sha2 hashes
    sha256 = hashlib.sha256(text).digest()
    sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
    return orig(self, text, transaction, link, p1, p2, *args, **kwargs)


def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
    text = orig(self, nodeorrev, *args, **kwargs)
    sd = self.sidedata(nodeorrev)
    if getattr(self, 'sidedatanocheck', False):
        return text
    if self.feature_config.has_side_data:
        return text
    if nodeorrev != nullrev and nodeorrev != self.nullid:
        cat1 = sd.get(sidedata.SD_TEST1)
        if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
            raise RuntimeError('text size mismatch')
        expected = sd.get(sidedata.SD_TEST2)
        got = hashlib.sha256(text).digest()
        if expected is not None and got != expected:
            raise RuntimeError('sha256 mismatch')
    return text


def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
    repo, computers, removers = orig(srcrepo, dstrepo)
    assert not computers and not removers  # deal with composition later
    addedreqs = dstrepo.requirements - srcrepo.requirements

    if requirements.REVLOGV2_REQUIREMENT in addedreqs:

        def computer(repo, revlog, rev, old_sidedata):
            assert not old_sidedata  # not supported yet
            update = {}
            revlog.sidedatanocheck = True
            try:
                text = revlog.revision(rev)
            finally:
                del revlog.sidedatanocheck
            ## let's store some arbitrary data just for testing
            # text length
            update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
            # and sha2 hashes
            sha256 = hashlib.sha256(text).digest()
            update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
            return update, (0, 0)

        srcrepo.register_sidedata_computer(
            constants.KIND_CHANGELOG,
            b"whatever",
            (sidedata.SD_TEST1, sidedata.SD_TEST2),
            computer,
            0,
        )
        dstrepo.register_wanted_sidedata(b"whatever")

    return sidedata.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)


def extsetup(ui):
    extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
    extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
    extensions.wrapfunction(
        upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
    )


def reposetup(ui, repo):
    # We don't register sidedata computers because we don't care within these
    # tests
    repo.register_wanted_sidedata(sidedata.SD_TEST1)
    repo.register_wanted_sidedata(sidedata.SD_TEST2)