view mercurial/pure/bdiff.py @ 51934:09f3a6790e56

interfaces: add the optional `bdiff.xdiffblocks()` method PyCharm flagged where this was called on the protocol class in `mdiff.py` in the previous commit, but pytype completely missed it. PyCharm is correct here, but I'm committing this separately to highlight this potential problem- some of the implementations don't implement _all_ of the methods the others do, and there's not a great way to indicate on a protocol class that a method or attribute is optional- that's kinda the opposite of what static typing is about. Making the method an `Optional[Callable]` attribute works here, and keeps both PyCharm and pytype happy, and the generated `mdiff.pyi` and `modules.pyi` look reasonable. We might be getting a little lucky, because the method isn't invoked directly- it is returned from another method that selects which block function to use. Except since it is declared on the protocol class, every module needs this attribute (in theory, but in practice this doesn't seem to be checked), so the check for it on the module has to change from `hasattr()` to `getattr(..., None)`. We defer defining the optional attrs to the type checking phase as an extra precaution- that way it isn't an attr with a `None` value at runtime if someone is still using `hasattr()`. As to why pytype missed this, I have no clue. The generated `mdiff.pyi` even has the global variable typed as `bdiff: intmod.BDiff`, so uses of it really should comply with what is on the class, protocol class or not.
author Matt Harbison <matt_harbison@yahoo.com>
date Sun, 29 Sep 2024 02:03:20 -0400
parents f4733654f144
children
line wrap: on
line source

# bdiff.py - Python implementation of bdiff.c
#
# Copyright 2009 Olivia Mackall <olivia@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import annotations

import difflib
import re
import struct
import typing

from typing import (
    List,
    Optional,
    Tuple,
)

from ..interfaces import (
    modules as intmod,
)


def splitnewlines(text: bytes) -> List[bytes]:
    '''like str.splitlines, but only split on newlines.'''
    lines = [l + b'\n' for l in text.split(b'\n')]
    if lines:
        if lines[-1] == b'\n':
            lines.pop()
        else:
            lines[-1] = lines[-1][:-1]
    return lines


def _normalizeblocks(
    a: List[bytes], b: List[bytes], blocks
) -> List[Tuple[int, int, int]]:
    prev = None
    r = []
    for curr in blocks:
        if prev is None:
            prev = curr
            continue
        shift = 0

        a1, b1, l1 = prev
        a1end = a1 + l1
        b1end = b1 + l1

        a2, b2, l2 = curr
        a2end = a2 + l2
        b2end = b2 + l2
        if a1end == a2:
            while (
                a1end + shift < a2end and a[a1end + shift] == b[b1end + shift]
            ):
                shift += 1
        elif b1end == b2:
            while (
                b1end + shift < b2end and a[a1end + shift] == b[b1end + shift]
            ):
                shift += 1
        r.append((a1, b1, l1 + shift))
        prev = a2 + shift, b2 + shift, l2 - shift

    if prev is not None:
        r.append(prev)

    return r


def bdiff(a: bytes, b: bytes) -> bytes:
    a = bytes(a).splitlines(True)
    b = bytes(b).splitlines(True)

    if not a:
        s = b"".join(b)
        return s and (struct.pack(b">lll", 0, 0, len(s)) + s)

    bin = []
    p = [0]
    for i in a:
        p.append(p[-1] + len(i))

    d = difflib.SequenceMatcher(None, a, b).get_matching_blocks()
    d = _normalizeblocks(a, b, d)
    la = 0
    lb = 0
    for am, bm, size in d:
        s = b"".join(b[lb:bm])
        if am > la or s:
            bin.append(struct.pack(b">lll", p[la], p[am], len(s)) + s)
        la = am + size
        lb = bm + size

    return b"".join(bin)


def blocks(a: bytes, b: bytes) -> List[Tuple[int, int, int, int]]:
    an = splitnewlines(a)
    bn = splitnewlines(b)
    d = difflib.SequenceMatcher(None, an, bn).get_matching_blocks()
    d = _normalizeblocks(an, bn, d)
    return [(i, i + n, j, j + n) for (i, j, n) in d]


def fixws(text: bytes, allws: bool) -> bytes:
    if allws:
        text = re.sub(b'[ \t\r]+', b'', text)
    else:
        text = re.sub(b'[ \t\r]+', b' ', text)
        text = text.replace(b' \n', b'\n')
    return text


# In order to adhere to the module protocol, these functions must be visible to
# the type checker, though they aren't actually implemented by this
# implementation of the module protocol.  Callers are responsible for
# checking that the implementation is available before using them.
if typing.TYPE_CHECKING:
    xdiffblocks: Optional[intmod.BDiffBlocksFnc] = None