comparison mercurial/mdiff.py @ 36414:44c4a38bf563

diff: do not split function name if character encoding is unknown Only ASCII characters can be split reliably at any byte positions, so let's just leave long multi-byte sequence long. It's probably less bad than putting an invalid byte sequence into a diff. This doesn't try to split the first ASCII slice from multi-byte sequence because a combining character may follow.
author Yuya Nishihara <yuya@tcha.org>
date Fri, 23 Feb 2018 23:09:58 +0900
parents 29dd37a418aa
children c6061cadb400
comparison
equal deleted inserted replaced
36413:f493829b74dd 36414:44c4a38bf563
11 import struct 11 import struct
12 import zlib 12 import zlib
13 13
14 from .i18n import _ 14 from .i18n import _
15 from . import ( 15 from . import (
16 encoding,
16 error, 17 error,
17 policy, 18 policy,
18 pycompat, 19 pycompat,
19 util, 20 util,
20 ) 21 )
346 # walk backwards from the start of the context up to the start of 347 # walk backwards from the start of the context up to the start of
347 # the previous hunk context until we find a line starting with an 348 # the previous hunk context until we find a line starting with an
348 # alphanumeric char. 349 # alphanumeric char.
349 for i in xrange(astart - 1, lastpos - 1, -1): 350 for i in xrange(astart - 1, lastpos - 1, -1):
350 if l1[i][0:1].isalnum(): 351 if l1[i][0:1].isalnum():
351 func = ' ' + l1[i].rstrip()[:40] 352 func = b' ' + l1[i].rstrip()
353 # split long function name if ASCII. otherwise we have no
354 # idea where the multi-byte boundary is, so just leave it.
355 if encoding.isasciistr(func):
356 func = func[:41]
352 lastfunc[1] = func 357 lastfunc[1] = func
353 break 358 break
354 # by recording this hunk's starting point as the next place to 359 # by recording this hunk's starting point as the next place to
355 # start looking for function lines, we avoid reading any line in 360 # start looking for function lines, we avoid reading any line in
356 # the file more than once. 361 # the file more than once.