Mercurial > hg
view tests/testlib/ext-sidedata.py @ 51576:de5bf3fe0233
revset: stop serializing node when using "%ln"
Turning hundred of thousand of node from node to hex and back can be slow… what
about we stop doing it?
In many case were we are using node id we should be using revision id. However
this is not a good reason to have a stupidly slow implementation of "%ln".
This caught my attention again because the phase discovery during push make an
extensive use of "%ln" or huge set. In absolute, that phase discovery probably
should use "%ld" and need to improves its algorithmic complexity, but improving
"%ln" seems simple and long overdue. This greatly speeds up `hg push` on
repository with many drafts.
Here are some relevant poulpe benchmarks:
### data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog
# benchmark.name = hg.command.push
# bin-env-vars.hg.flavor = default
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.explicit-rev = all-out-heads
# benchmark.variants.issue6528 = disabled
# benchmark.variants.protocol = ssh
# benchmark.variants.reuse-external-delta-parent = default
## benchmark.variants.revs = any-1-extra-rev
before: 44.235070
after: 20.416329 (-53.85%, -23.82)
## benchmark.variants.revs = any-100-extra-rev
before: 49.234697
after: 26.519829 (-46.14%, -22.71)
### benchmark.name = hg.command.bundle
# bin-env-vars.hg.flavor = default
# bin-env-vars.hg.py-re2-module = default
# benchmark.variants.revs = all
# benchmark.variants.type = none-streamv2
## data-env-vars.name = heptapod-public-2024-03-25-zstd-sparse-revlog
before: 10.138396
after: 7.750458 (-23.55%, -2.39)
## data-env-vars.name = mercurial-public-2024-03-22-zstd-sparse-revlog
before: 1.263859
after: 0.700229 (-44.60%, -0.56)
## data-env-vars.name = mozilla-try-2023-03-22-zstd-sparse-revlog
before: 399.484481
after: 346.5089 (-13.26%, -52.98)
## data-env-vars.name = pypy-2024-03-22-zstd-sparse-revlog
before: 4.540080
after: 3.401700 (-25.07%, -1.14)
## data-env-vars.name = tryton-public-2024-03-22-zstd-sparse-revlog
before: 2.975765
after: 1.870798 (-37.13%, -1.10)
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Fri, 05 Apr 2024 11:05:54 +0200 |
parents | 7c2dc75cdc0f |
children |
line wrap: on
line source
# ext-sidedata.py - small extension to test the sidedata logic # # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import hashlib import struct from mercurial.node import nullrev from mercurial import ( extensions, requirements, revlog, ) from mercurial.upgrade_utils import engine as upgrade_engine from mercurial.revlogutils import constants from mercurial.revlogutils import sidedata def wrapaddrevision( orig, self, text, transaction, link, p1, p2, *args, **kwargs ): if kwargs.get('sidedata') is None: kwargs['sidedata'] = {} sd = kwargs['sidedata'] ## let's store some arbitrary data just for testing # text length sd[sidedata.SD_TEST1] = struct.pack('>I', len(text)) # and sha2 hashes sha256 = hashlib.sha256(text).digest() sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256) return orig(self, text, transaction, link, p1, p2, *args, **kwargs) def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs): text = orig(self, nodeorrev, *args, **kwargs) sd = self.sidedata(nodeorrev) if getattr(self, 'sidedatanocheck', False): return text if self.feature_config.has_side_data: return text if nodeorrev != nullrev and nodeorrev != self.nullid: cat1 = sd.get(sidedata.SD_TEST1) if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]: raise RuntimeError('text size mismatch') expected = sd.get(sidedata.SD_TEST2) got = hashlib.sha256(text).digest() if expected is not None and got != expected: raise RuntimeError('sha256 mismatch') return text def wrapget_sidedata_helpers(orig, srcrepo, dstrepo): repo, computers, removers = orig(srcrepo, dstrepo) assert not computers and not removers # deal with composition later addedreqs = dstrepo.requirements - srcrepo.requirements if requirements.REVLOGV2_REQUIREMENT in addedreqs: def computer(repo, revlog, rev, old_sidedata): assert not old_sidedata # not supported yet update = {} revlog.sidedatanocheck = True try: text = revlog.revision(rev) finally: del revlog.sidedatanocheck ## let's store some arbitrary data just for testing # text length update[sidedata.SD_TEST1] = struct.pack('>I', len(text)) # and sha2 hashes sha256 = hashlib.sha256(text).digest() update[sidedata.SD_TEST2] = struct.pack('>32s', sha256) return update, (0, 0) srcrepo.register_sidedata_computer( constants.KIND_CHANGELOG, b"whatever", (sidedata.SD_TEST1, sidedata.SD_TEST2), computer, 0, ) dstrepo.register_wanted_sidedata(b"whatever") return sidedata.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata) def extsetup(ui): extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision) extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata) extensions.wrapfunction( upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers ) def reposetup(ui, repo): # We don't register sidedata computers because we don't care within these # tests repo.register_wanted_sidedata(sidedata.SD_TEST1) repo.register_wanted_sidedata(sidedata.SD_TEST2)