Mercurial > hg
view mercurial/cffi/bdiff.py @ 50317:af776c3d5c3e stable
debugdeltachain: stop summing the same chain over and over
Before this patch, delta chain size was computed from scratch for each chain,
disregarding the fact very likely already computed the same of length-1 prefix
for another revisions.
We not cache delta chain size and shortcut the computation when we see them.
Just for my mercurial-devel clone, this move the computation from about 17.5
second to about 4.8 seconds.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 21 Mar 2023 15:44:38 +0000 |
parents | 594fc56c0af7 |
children | ecc3a893979d |
line wrap: on
line source
# bdiff.py - CFFI implementation of bdiff.c # # Copyright 2016 Maciej Fijalkowski <fijall@gmail.com> # # This software may be used and distributed according to the terms of the # GNU General Public License version 2 or any later version. import struct from typing import ( List, Tuple, ) from ..pure.bdiff import * from . import _bdiff # pytype: disable=import-error ffi = _bdiff.ffi lib = _bdiff.lib def blocks(sa: bytes, sb: bytes) -> List[Tuple[int, int, int, int]]: a = ffi.new(b"struct bdiff_line**") b = ffi.new(b"struct bdiff_line**") ac = ffi.new(b"char[]", str(sa)) bc = ffi.new(b"char[]", str(sb)) l = ffi.new(b"struct bdiff_hunk*") try: an = lib.bdiff_splitlines(ac, len(sa), a) bn = lib.bdiff_splitlines(bc, len(sb), b) if not a[0] or not b[0]: raise MemoryError count = lib.bdiff_diff(a[0], an, b[0], bn, l) if count < 0: raise MemoryError rl = [(0, 0, 0, 0)] * count h = l.next i = 0 while h: rl[i] = (h.a1, h.a2, h.b1, h.b2) h = h.next i += 1 finally: lib.free(a[0]) lib.free(b[0]) lib.bdiff_freehunks(l.next) return rl def bdiff(sa: bytes, sb: bytes) -> bytes: a = ffi.new(b"struct bdiff_line**") b = ffi.new(b"struct bdiff_line**") ac = ffi.new(b"char[]", str(sa)) bc = ffi.new(b"char[]", str(sb)) l = ffi.new(b"struct bdiff_hunk*") try: an = lib.bdiff_splitlines(ac, len(sa), a) bn = lib.bdiff_splitlines(bc, len(sb), b) if not a[0] or not b[0]: raise MemoryError count = lib.bdiff_diff(a[0], an, b[0], bn, l) if count < 0: raise MemoryError rl = [] h = l.next la = lb = 0 while h: if h.a1 != la or h.b1 != lb: lgt = (b[0] + h.b1).l - (b[0] + lb).l rl.append( struct.pack( b">lll", (a[0] + la).l - a[0].l, (a[0] + h.a1).l - a[0].l, lgt, ) ) rl.append(str(ffi.buffer((b[0] + lb).l, lgt))) la = h.a2 lb = h.b2 h = h.next finally: lib.free(a[0]) lib.free(b[0]) lib.bdiff_freehunks(l.next) return b"".join(rl)