view mercurial/diffhelper.py @ 45020:697212a830fb stable

convert: bail out in Subversion source if encountering non-ASCII HTTP(S) URL Before this patch, in the tested case, urllib raised `httplib.InvalidURL: URL can't contain control characters. '/\xff/!svn/ver/0/.svn' (found at least '\xff')`, which resulted in that the URL was never recognized as a Subversion repository. This patch adds a check that bails out if the URL contains non-ASCII characters. The warning is not overly user-friendly, but giving the user something to type into a search engine is definitively better than not explaining why the repository was not recognized. We could support non-ASCII chracters by quoting them before passing them to urllib. However, we would want to be compatible with what the `svn` command does, which converts the URL from the locale encoding to UTF-8, percent-encodes it and sends it to the server. If the locale encoding is not UTF-8, the behavior is IMHO not very intuitive, as the `svn` command may send different (percent-encoded) octets than what was passed on the console. Instead of copying this behavior, we better leave it forbidden.
author Manuel Jacob <me@manueljacob.de>
date Tue, 30 Jun 2020 04:55:52 +0200
parents 687b865b95ad
children 10f48720ef95
line wrap: on
line source

# diffhelper.py - helper routines for patch
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

from .i18n import _

from . import (
    error,
    pycompat,
)


def addlines(fp, hunk, lena, lenb, a, b):
    """Read lines from fp into the hunk

    The hunk is parsed into two arrays, a and b. a gets the old state of
    the text, b gets the new state. The control char from the hunk is saved
    when inserting into a, but not b (for performance while deleting files.)
    """
    while True:
        todoa = lena - len(a)
        todob = lenb - len(b)
        num = max(todoa, todob)
        if num == 0:
            break
        for i in pycompat.xrange(num):
            s = fp.readline()
            if not s:
                raise error.ParseError(_(b'incomplete hunk'))
            if s == b"\\ No newline at end of file\n":
                fixnewline(hunk, a, b)
                continue
            if s == b'\n' or s == b'\r\n':
                # Some patches may be missing the control char
                # on empty lines. Supply a leading space.
                s = b' ' + s
            hunk.append(s)
            if s.startswith(b'+'):
                b.append(s[1:])
            elif s.startswith(b'-'):
                a.append(s)
            else:
                b.append(s[1:])
                a.append(s)


def fixnewline(hunk, a, b):
    """Fix up the last lines of a and b when the patch has no newline at EOF"""
    l = hunk[-1]
    # tolerate CRLF in last line
    if l.endswith(b'\r\n'):
        hline = l[:-2]
    else:
        hline = l[:-1]

    if hline.startswith((b' ', b'+')):
        b[-1] = hline[1:]
    if hline.startswith((b' ', b'-')):
        a[-1] = hline
    hunk[-1] = hline


def testhunk(a, b, bstart):
    """Compare the lines in a with the lines in b

    a is assumed to have a control char at the start of each line, this char
    is ignored in the compare.
    """
    alen = len(a)
    blen = len(b)
    if alen > blen - bstart or bstart < 0:
        return False
    for i in pycompat.xrange(alen):
        if a[i][1:] != b[i + bstart]:
            return False
    return True