view mercurial/pure/bdiff.py @ 47072:4c041c71ec01

revlog: introduce an explicit tracking of what the revlog is about Since the dawn of time, people have been forced to rely to lossy introspection of the index filename to determine what the purpose and role of the revlog they encounter is. This is hacky, error prone, inflexible, abstraction-leaky, <insert-your-own-complaints-here>. In f63299ee7e4d Raphaël introduced a new attribute to track this information: `revlog_kind`. However it is initialized in an odd place and various instances end up not having it set. In addition is only tracking some of the information we end up having to introspect in various pieces of code. So we add a new attribute that holds more data and is more strictly enforced. This work is done in collaboration with Raphaël. The `revlog_kind` one will be removed/adapted in the next changeset. We expect to be able to clean up various existing piece of code and to simplify coming work around the newer revlog format. Differential Revision: https://phab.mercurial-scm.org/D10352
author Pierre-Yves David <pierre-yves.david@octobus.net>
date Tue, 06 Apr 2021 05:20:24 +0200
parents d4ba4d51f85f
children 6000f5b25c9b
line wrap: on
line source

# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import difflib
import re
import struct


def splitnewlines(text):
    '''like str.splitlines, but only split on newlines.'''
    lines = [l + b'\n' for l in text.split(b'\n')]
    if lines:
        if lines[-1] == b'\n':
            lines.pop()
        else:
            lines[-1] = lines[-1][:-1]
    return lines


def _normalizeblocks(a, b, blocks):
    prev = None
    r = []
    for curr in blocks:
        if prev is None:
            prev = curr
            continue
        shift = 0

        a1, b1, l1 = prev
        a1end = a1 + l1
        b1end = b1 + l1

        a2, b2, l2 = curr
        a2end = a2 + l2
        b2end = b2 + l2
        if a1end == a2:
            while (
                a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
            ):
                shift += 1
        elif b1end == b2:
            while (
                b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
            ):
                shift += 1
        r.append((a1, b1, l1 + shift))
        prev = a2 + shift, b2 + shift, l2 - shift

    if prev is not None:
        r.append(prev)

    return r


def bdiff(a, b):
    a = bytes(a).splitlines(True)
    b = bytes(b).splitlines(True)

    if not a:
        s = b"".join(b)
        return s and (struct.pack(b">lll", 0, 0, len(s)) + s)

    bin = []
    p = [0]
    for i in a:
        p.append(p[-1] + len(i))

    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
    d = _normalizeblocks(a, b, d)
    la = 0
    lb = 0
    for am, bm, size in d:
        s = b"".join(b[lb:bm])
        if am > la or s:
            bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
        la = am + size
        lb = bm + size

    return b"".join(bin)


def blocks(a, b):
    an = splitnewlines(a)
    bn = splitnewlines(b)
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
    d = _normalizeblocks(an, bn, d)
    return [(i, i + n, j, j + n) for (i, j, n) in d]


def fixws(text, allws):
    if allws:
        text = re.sub(b'[ \t\r]+', b'', text)
    else:
        text = re.sub(b'[ \t\r]+', b' ', text)
        text = text.replace(b' \n', b'\n')
    return text