view mercurial/cffi/bdiff.py @ 50317:af776c3d5c3e stable

debugdeltachain: stop summing the same chain over and over Before this patch, delta chain size was computed from scratch for each chain, disregarding the fact very likely already computed the same of length-1 prefix for another revisions. We not cache delta chain size and shortcut the computation when we see them. Just for my mercurial-devel clone, this move the computation from about 17.5 second to about 4.8 seconds.
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 21 Mar 2023 15:44:38 +0000
parents 594fc56c0af7
children ecc3a893979d
line wrap: on
line source

# bdiff.py - CFFI implementation of bdiff.c
#
# Copyright 2016 Maciej Fijalkowski <fijall@gmail.com>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.


import struct

from typing import (
    List,
    Tuple,
)

from ..pure.bdiff import *
from . import _bdiff  # pytype: disable=import-error

ffi = _bdiff.ffi
lib = _bdiff.lib


def blocks(sa: bytes, sb: bytes) -> List[Tuple[int, int, int, int]]:
    a = ffi.new(b"struct bdiff_line**")
    b = ffi.new(b"struct bdiff_line**")
    ac = ffi.new(b"char[]", str(sa))
    bc = ffi.new(b"char[]", str(sb))
    l = ffi.new(b"struct bdiff_hunk*")
    try:
        an = lib.bdiff_splitlines(ac, len(sa), a)
        bn = lib.bdiff_splitlines(bc, len(sb), b)
        if not a[0] or not b[0]:
            raise MemoryError
        count = lib.bdiff_diff(a[0], an, b[0], bn, l)
        if count < 0:
            raise MemoryError
        rl = [(0, 0, 0, 0)] * count
        h = l.next
        i = 0
        while h:
            rl[i] = (h.a1, h.a2, h.b1, h.b2)
            h = h.next
            i += 1
    finally:
        lib.free(a[0])
        lib.free(b[0])
        lib.bdiff_freehunks(l.next)
    return rl


def bdiff(sa: bytes, sb: bytes) -> bytes:
    a = ffi.new(b"struct bdiff_line**")
    b = ffi.new(b"struct bdiff_line**")
    ac = ffi.new(b"char[]", str(sa))
    bc = ffi.new(b"char[]", str(sb))
    l = ffi.new(b"struct bdiff_hunk*")
    try:
        an = lib.bdiff_splitlines(ac, len(sa), a)
        bn = lib.bdiff_splitlines(bc, len(sb), b)
        if not a[0] or not b[0]:
            raise MemoryError
        count = lib.bdiff_diff(a[0], an, b[0], bn, l)
        if count < 0:
            raise MemoryError
        rl = []
        h = l.next
        la = lb = 0
        while h:
            if h.a1 != la or h.b1 != lb:
                lgt = (b[0] + h.b1).l - (b[0] + lb).l
                rl.append(
                    struct.pack(
                        b">lll",
                        (a[0] + la).l - a[0].l,
                        (a[0] + h.a1).l - a[0].l,
                        lgt,
                    )
                )
                rl.append(str(ffi.buffer((b[0] + lb).l, lgt)))
            la = h.a2
            lb = h.b2
            h = h.next

    finally:
        lib.free(a[0])
        lib.free(b[0])
        lib.bdiff_freehunks(l.next)
    return b"".join(rl)