view mercurial/cffi/bdiff.py @ 51683:5f37c36f36b9

revlog: use mmap by default is pre-population is available Using mmap has a great impact of memory usage on server, and a good impact on performance in multiple case. Now that we pre-populate memory mapping by default, there is case where it using mmap is slower. So we use it by default (if pre-population is available). Further work to reduce the performance impact of the pre-population will be done later. Some benchmark below (using the same setup as 522b4d729e89): As for 522b4d729e89 the impact on small repository like Mercurial or Pypy is tiny, ~1% best. However for large repositories we see some performance improvement without seeing the performance regression that we could have without pre-populate. ##### For netbeans ### data-env-vars.name = netbeans-2018-08-01-zstd-sparse-revlog ## benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # benchmark.variants.limit-rev = 1 # benchmark.variants.patch = yes no-mmap: 0.171579 mmap: 0.166311 (-3.07%, -0.01) # bin-env-vars.hg.flavor = default no-mmap: 0.170716 mmap: 0.165218 (-3.22%, -0.01) # benchmark.variants.patch = no # benchmark.variants.rev = tip no-mmap: 0.140862 mmap: 0.137566 (-2.34%, -0.00) ## benchmark.name = hg.command.unbundle # bin-env-vars.hg.flavor = rust # benchmark.variants.issue6528 = disabled # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev # benchmark.variants.source = unbundle no-mmap: 0.238038 mmap: 0.239912 no-populate: 0.cbd4c9 (+11.71%, +0.03) #### For Mozilla ### data-env-vars.name = mozilla-try-2019-02-18-ds2-pnm # benchmark.name = hg.command.log # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.limit-rev = 1 # benchmark.variants.patch = yes no-mmap: 0.258440 mmap: 0.237813 (-7.98%, -0.02) # benchmark.variants.limit-rev = 10 no-mmap: 1.235323 mmap: 1.213578 (-1.76%, -0.02) ## benchmark.name = hg.command.push # bin-env-vars.hg.flavor = rust # bin-env-vars.hg.py-re2-module = default # benchmark.variants.explicit-rev = none # benchmark.variants.issue6528 = disabled # benchmark.variants.protocol = ssh # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev no-mmap: 4.790135 mmap: 4.668971 (-2.53%, -0.12) no-populate: 4.841141 (+1.06%, +0.05) ### data-env-vars.name = mozilla-try-2019-02-18-zstd-sparse-revlog ## benchmark.name = hg.command.log # bin-env-vars.hg.flavor = default # benchmark.variants.limit-rev = 1000 # benchmark.variants.rev = tip no-mmap: 0.206187 mmap: 0.197348 (-4.29%, -0.01) ## benchmark.name = hg.command.push # bin-env-vars.hg.flavor = default # benchmark.variants.explicit-rev = none # benchmark.variants.issue6528 = disabled # benchmark.variants.protocol = ssh # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev no-mmap: 4.768259 mmap: 4.798632 no-populate: 4.953295 (+3.88%, +0.19) # benchmark.variants.revs = any-100-extra-rev no-mmap: 4.785946 mmap: 4.903618 no-populate: 5.014963 (+4.79%, +0.23) ## benchmark.name = hg.command.unbundle # bin-env-vars.hg.flavor = default # benchmark.variants.issue6528 = disabled # benchmark.variants.reuse-external-delta-parent = yes # benchmark.variants.revs = any-1-extra-rev # benchmark.variants.source = unbundle no-mmap: 1.400121 mmap: 1.423411 no-populate: 1.585365 (+13.23%, +0.19)
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Mon, 08 Jul 2024 15:48:34 +0200
parents 594fc56c0af7
children ecc3a893979d
line wrap: on
line source

# bdiff.py - CFFI implementation of bdiff.c
#
# Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.


import struct

from typing import (
    List,
    Tuple,
)

from ..pure.bdiff import *
from . import _bdiff  # pytype: disable=import-error

ffi = _bdiff.ffi
lib = _bdiff.lib


def blocks(sa: bytes, sb: bytes) -> List[Tuple[int, int, int, int]]:
    a = ffi.new(b"struct bdiff_line**")
    b = ffi.new(b"struct bdiff_line**")
    ac = ffi.new(b"char[]", str(sa))
    bc = ffi.new(b"char[]", str(sb))
    l = ffi.new(b"struct bdiff_hunk*")
    try:
        an = lib.bdiff_splitlines(ac, len(sa), a)
        bn = lib.bdiff_splitlines(bc, len(sb), b)
        if not a[0] or not b[0]:
            raise MemoryError
        count = lib.bdiff_diff(a[0], an, b[0], bn, l)
        if count < 0:
            raise MemoryError
        rl = [(0, 0, 0, 0)] * count
        h = l.next
        i = 0
        while h:
            rl[i] = (h.a1, h.a2, h.b1, h.b2)
            h = h.next
            i += 1
    finally:
        lib.free(a[0])
        lib.free(b[0])
        lib.bdiff_freehunks(l.next)
    return rl


def bdiff(sa: bytes, sb: bytes) -> bytes:
    a = ffi.new(b"struct bdiff_line**")
    b = ffi.new(b"struct bdiff_line**")
    ac = ffi.new(b"char[]", str(sa))
    bc = ffi.new(b"char[]", str(sb))
    l = ffi.new(b"struct bdiff_hunk*")
    try:
        an = lib.bdiff_splitlines(ac, len(sa), a)
        bn = lib.bdiff_splitlines(bc, len(sb), b)
        if not a[0] or not b[0]:
            raise MemoryError
        count = lib.bdiff_diff(a[0], an, b[0], bn, l)
        if count < 0:
            raise MemoryError
        rl = []
        h = l.next
        la = lb = 0
        while h:
            if h.a1 != la or h.b1 != lb:
                lgt = (b[0] + h.b1).l - (b[0] + lb).l
                rl.append(
                    struct.pack(
                        b">lll",
                        (a[0] + la).l - a[0].l,
                        (a[0] + h.a1).l - a[0].l,
                        lgt,
                    )
                )
                rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
            la = h.a2
            lb = h.b2
            h = h.next

    finally:
        lib.free(a[0])
        lib.free(b[0])
        lib.bdiff_freehunks(l.next)
    return b"".join(rl)