view tests/testlib/ext-sidedata-3.py @ 50400:95acba2c29f6

encoding: avoid quadratic time complexity when json-encoding non-UTF8 strings Apparently the code uses "+=" with a bytes object, which is linear-time, so the whole encoding is quadratic-time. This patch makes us use a bytearray object, instead, which has a(n amortized-)constant-time append operation. The encoding is still not particularly fast, but at least a 10MB file takes tens of seconds, not many hours to encode.
author Arseniy Alekseyev <aalekseyev@janestreet.com>
date Mon, 06 Mar 2023 11:27:57 +0000
parents 6000f5b25c9b
children
line wrap: on
line source

# coding: utf8
# ext-sidedata-3.py - small extension to test (differently still) the sidedata
# logic
#
# Simulates a client for a complex sidedata exchange.
#
# Copyright 2021 Raphaël Gomès <rgomes@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.


import hashlib
import struct

from mercurial import (
    extensions,
    revlog,
)

from mercurial.revlogutils import sidedata as sidedatamod
from mercurial.revlogutils import constants

NO_FLAGS = (0, 0)


def compute_sidedata_1(repo, revlog, rev, sidedata, text=None):
    sidedata = sidedata.copy()
    if text is None:
        text = revlog.revision(rev)
    sidedata[sidedatamod.SD_TEST1] = struct.pack('>I', len(text))
    return sidedata, NO_FLAGS


def compute_sidedata_2(repo, revlog, rev, sidedata, text=None):
    sidedata = sidedata.copy()
    if text is None:
        text = revlog.revision(rev)
    sha256 = hashlib.sha256(text).digest()
    sidedata[sidedatamod.SD_TEST2] = struct.pack('>32s', sha256)
    return sidedata, NO_FLAGS


def compute_sidedata_3(repo, revlog, rev, sidedata, text=None):
    sidedata = sidedata.copy()
    if text is None:
        text = revlog.revision(rev)
    sha384 = hashlib.sha384(text).digest()
    sidedata[sidedatamod.SD_TEST3] = struct.pack('>48s', sha384)
    return sidedata, NO_FLAGS


def wrapaddrevision(
    orig, self, text, transaction, link, p1, p2, *args, **kwargs
):
    if kwargs.get('sidedata') is None:
        kwargs['sidedata'] = {}
    sd = kwargs['sidedata']
    sd, flags = compute_sidedata_1(None, self, None, sd, text=text)
    kwargs['sidedata'] = compute_sidedata_2(None, self, None, sd, text=text)[0]
    return orig(self, text, transaction, link, p1, p2, *args, **kwargs)


def extsetup(ui):
    extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)


def reposetup(ui, repo):
    # Sidedata keys happen to be the same as the categories, easier for testing.
    for kind in constants.ALL_KINDS:
        repo.register_sidedata_computer(
            kind,
            sidedatamod.SD_TEST1,
            (sidedatamod.SD_TEST1,),
            compute_sidedata_1,
            0,
        )
        repo.register_sidedata_computer(
            kind,
            sidedatamod.SD_TEST2,
            (sidedatamod.SD_TEST2,),
            compute_sidedata_2,
            0,
        )
        repo.register_sidedata_computer(
            kind,
            sidedatamod.SD_TEST3,
            (sidedatamod.SD_TEST3,),
            compute_sidedata_3,
            0,
        )
    repo.register_wanted_sidedata(sidedatamod.SD_TEST1)
    repo.register_wanted_sidedata(sidedatamod.SD_TEST2)