Mercurial > hg
view tests/testlib/ext-sidedata-2.py @ 50400:95acba2c29f6
encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings
Apparently the code uses "+=" with a bytes object, which is linear-time, so the
whole encoding is quadratic-time. This patch makes us use a bytearray object,
instead, which has a(n amortized-)constant-time append operation.
The encoding is still not particularly fast, but at least a 10MB file
takes tens of seconds, not many hours to encode.
author | Arseniy Alekseyev <aalekseyev@janestreet.com> |
---|---|
date | Mon, 06 Mar 2023 11:27:57 +0000 |
parents | 6000f5b25c9b |
children |
line wrap: on
line source
# coding: utf8 # ext-sidedata-2.py - small extension to test (differently) the sidedata logic # # Simulates a client for a complex sidedata exchange. # # Copyright 2021 Raphaël Gomès <rgomes@octobus.net> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import hashlib import struct from mercurial.revlogutils import sidedata as sidedatamod from mercurial.revlogutils import constants NO_FLAGS = (0, 0) # hoot def compute_sidedata_1(repo, revlog, rev, sidedata, text=None): sidedata = sidedata.copy() if text is None: text = revlog.revision(rev) sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text)) return sidedata, NO_FLAGS def compute_sidedata_2(repo, revlog, rev, sidedata, text=None): sidedata = sidedata.copy() if text is None: text = revlog.revision(rev) sha256 = hashlib.sha256(text).digest() sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256) return sidedata, NO_FLAGS def reposetup(ui, repo): # Sidedata keys happen to be the same as the categories, easier for testing. for kind in constants.ALL_KINDS: repo.register_sidedata_computer( kind, sidedatamod.SD_TEST1, (sidedatamod.SD_TEST1,), compute_sidedata_1, 0, ) repo.register_sidedata_computer( kind, sidedatamod.SD_TEST2, (sidedatamod.SD_TEST2,), compute_sidedata_2, 0, )