view mercurial/diffhelper.py @ 37947:3ea3c96ada54

encoding: introduce tagging type for non-lossy non-ASCII string This fixes the weird behavior of toutf8b(), which would convert a local string back to UTF-8 *only if* it was lossy in the system encoding. Before b7b26e54e37a "encoding: avoid localstr when a string can be encoded losslessly (issue2763)", all local strings were wrapped by the localstr class. I think this would justify the round-trip behavior of toutf8b(). ASCII strings are special-cased, so the cost of wrapping with safelocalstr is negligible. (with mercurial repo) $ export HGRCPATH=/dev/null HGPLAIN= HGENCODING=latin-1 $ hg log --time --config experimental.evolution=all > /dev/null (original) time: real 11.340 secs (user 11.290+0.000 sys 0.060+0.000) time: real 11.390 secs (user 11.300+0.000 sys 0.080+0.000) time: real 11.430 secs (user 11.360+0.000 sys 0.070+0.000) (this patch) time: real 11.200 secs (user 11.100+0.000 sys 0.100+0.000) time: real 11.370 secs (user 11.300+0.000 sys 0.070+0.000) time: real 11.190 secs (user 11.130+0.000 sys 0.060+0.000)
author Yuya Nishihara <yuya@tcha.org>
date Sun, 23 Apr 2017 13:15:30 +0900
parents 86e7a57449fa
children e7aa113b14f7
line wrap: on
line source

# diffhelper.py - helper routines for patch
#
# Copyright 2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

from .i18n import _

from . import (
    error,
)

def addlines(fp, hunk, lena, lenb, a, b):
    """Read lines from fp into the hunk

    The hunk is parsed into two arrays, a and b. a gets the old state of
    the text, b gets the new state. The control char from the hunk is saved
    when inserting into a, but not b (for performance while deleting files.)
    """
    while True:
        todoa = lena - len(a)
        todob = lenb - len(b)
        num = max(todoa, todob)
        if num == 0:
            break
        for i in xrange(num):
            s = fp.readline()
            if not s:
                raise error.ParseError(_('incomplete hunk'))
            if s == "\\ No newline at end of file\n":
                fixnewline(hunk, a, b)
                continue
            if s == '\n' or s == '\r\n':
                # Some patches may be missing the control char
                # on empty lines. Supply a leading space.
                s = ' ' + s
            hunk.append(s)
            if s.startswith('+'):
                b.append(s[1:])
            elif s.startswith('-'):
                a.append(s)
            else:
                b.append(s[1:])
                a.append(s)

def fixnewline(hunk, a, b):
    """Fix up the last lines of a and b when the patch has no newline at EOF"""
    l = hunk[-1]
    # tolerate CRLF in last line
    if l.endswith('\r\n'):
        hline = l[:-2]
    else:
        hline = l[:-1]

    if hline.startswith((' ', '+')):
        b[-1] = hline[1:]
    if hline.startswith((' ', '-')):
        a[-1] = hline
    hunk[-1] = hline

def testhunk(a, b, bstart):
    """Compare the lines in a with the lines in b

    a is assumed to have a control char at the start of each line, this char
    is ignored in the compare.
    """
    alen = len(a)
    blen = len(b)
    if alen > blen - bstart or bstart < 0:
        return False
    for i in xrange(alen):
        if a[i][1:] != b[i + bstart]:
            return False
    return True