view mercurial/pure/diffhelpers.py @ 12717:89df79b3c011 stable

convert/darcs: support changelogs with bytes 0x7F-0xFF (issue2411) This is a followup to 4481f8a93c7a, which only fixed the conversion of patches with UTF-8 metadata. This patch allows a changelog to have any bytes with values 0x7F-0xFF. It parses the XML changelog as Latin-1 and uses converter_source.recode() to decode the data as UTF-8/Latin-1. Caveats: - Since the convert extension doesn't provide any way to specify the source encoding, users are still limited to UTF-8 and Latin-1. - etree will still complain if the changelog has bytes with values 0x00-0x19. XML only allows printable characters.
author Brodie Rao <brodie@bitheap.org>
date Fri, 01 Oct 2010 10:15:04 -0500
parents f61dced1367a
children 4f8067c94729
line wrap: on
line source

# diffhelpers.py - pure Python implementation of diffhelpers.c
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

def addlines(fp, hunk, lena, lenb, a, b):
    while True:
        todoa = lena - len(a)
        todob = lenb - len(b)
        num = max(todoa, todob)
        if num == 0:
            break
        for i in xrange(num):
            s = fp.readline()
            c = s[0]
            if s == "\\ No newline at end of file\n":
                fix_newline(hunk, a, b)
                continue
            if c == "\n":
                # Some patches may be missing the control char
                # on empty lines. Supply a leading space.
                s = " \n"
            hunk.append(s)
            if c == "+":
                b.append(s[1:])
            elif c == "-":
                a.append(s)
            else:
                b.append(s[1:])
                a.append(s)
    return 0

def fix_newline(hunk, a, b):
    l = hunk[-1]
    # tolerate CRLF in last line
    if l.endswith('\r\n'):
        hline = l[:-2]
    else:
        hline = l[:-1]
    c = hline[0]

    if c == " " or c == "+":
        b[-1] = hline[1:]
    if c == " " or c == "-":
        a[-1] = hline
    hunk[-1] = hline
    return 0


def testhunk(a, b, bstart):
    alen = len(a)
    blen = len(b)
    if alen > blen - bstart:
        return -1
    for i in xrange(alen):
        if a[i][1:] != b[i + bstart]:
            return -1
    return 0