contrib/bdiff-torture.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Sun, 05 Apr 2020 13:12:05 +0200
changeset 44863 640d5b3bd060
parent 43076 2372284d9457
child 48966 6000f5b25c9b
permissions -rw-r--r--
nodemap: also use persistent nodemap for manifest The manifest as a different usage pattern than the changelog. First, while the lookup in changelog are not garanteed to match, the lookup in the manifest nodemap come from changelog and will exist in the manifest. In addition, looking up a manifest almost always result in unpacking a manifest an operation that rarely come cheap. Nevertheless, using a persistent nodemap provide a significant gain for some operations. For our measurementw, we use `hg cat --rev REV FILE` on the our reference mozilla-try. On this repository the persistent nodemap cache is about 29 MB in side for a total store side of 11,988 MB File with large history (file: b2g/config/gaia.json, revision: 195a1146daa0) no optimisation: 0.358s using mmap for index: 0.297s (-0.061s) persistent nodemap for changelog only: 0.275s (-0.024s) persistent nodemap for manifest too: 0.258s (-0.017s) File with small history (file: .hgignore, revision: 195a1146daa0) no optimisation: 0.377s using mmap for index: 0.296s (-0.061s) persistent nodemap for changelog only: 0.274s (-0.022s) persistent nodemap for manifest too: 0.257s (-0.017s) Same file but using a revision (8ba995b74e18) with a smaller manifest (3944829 bytes vs 10 bytes) no optimisation: 0.192s (-0.185s) using mmap for index: 0.131s (-0.061s) persistent nodemap for changelog only: 0.106s (-0.025s) persistent nodemap for manifest too: 0.087s (-0.019s) Differential Revision: https://phab.mercurial-scm.org/D8410

# Randomized torture test generation for bdiff

from __future__ import absolute_import, print_function
import random
import sys

from mercurial import (
    mdiff,
    pycompat,
)


def reducetest(a, b):
    tries = 0
    reductions = 0
    print("reducing...")
    while tries < 1000:
        a2 = (
            "\n".join(l for l in a.splitlines() if random.randint(0, 100) > 0)
            + "\n"
        )
        b2 = (
            "\n".join(l for l in b.splitlines() if random.randint(0, 100) > 0)
            + "\n"
        )
        if a2 == a and b2 == b:
            continue
        if a2 == b2:
            continue
        tries += 1

        try:
            test1(a, b)
        except Exception:
            reductions += 1
            tries = 0
            a = a2
            b = b2

    print("reduced:", reductions, len(a) + len(b), repr(a), repr(b))
    try:
        test1(a, b)
    except Exception as inst:
        print("failed:", inst)

    sys.exit(0)


def test1(a, b):
    d = mdiff.textdiff(a, b)
    if not d:
        raise ValueError("empty")
    c = mdiff.patches(a, [d])
    if c != b:
        raise ValueError("bad")


def testwrap(a, b):
    try:
        test1(a, b)
        return
    except Exception as inst:
        print("exception:", inst)
    reducetest(a, b)


def test(a, b):
    testwrap(a, b)
    testwrap(b, a)


def rndtest(size, noise):
    a = []
    src = "                aaaaaaaabbbbccd"
    for x in pycompat.xrange(size):
        a.append(src[random.randint(0, len(src) - 1)])

    while True:
        b = [c for c in a if random.randint(0, 99) > noise]
        b2 = []
        for c in b:
            b2.append(c)
            while random.randint(0, 99) < noise:
                b2.append(src[random.randint(0, len(src) - 1)])
        if b2 != a:
            break

    a = "\n".join(a) + "\n"
    b = "\n".join(b2) + "\n"

    test(a, b)


maxvol = 10000
startsize = 2
while True:
    size = startsize
    count = 0
    while size < maxvol:
        print(size)
        volume = 0
        while volume < maxvol:
            rndtest(size, 2)
            volume += size
            count += 2
        size *= 2
    maxvol *= 4
    startsize *= 4