Mercurial > hg
comparison mercurial/mdiff.py @ 36414:44c4a38bf563
diff: do not split function name if character encoding is unknown
Only ASCII characters can be split reliably at any byte positions, so let's
just leave long multi-byte sequence long. It's probably less bad than putting
an invalid byte sequence into a diff.
This doesn't try to split the first ASCII slice from multi-byte sequence
because a combining character may follow.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Fri, 23 Feb 2018 23:09:58 +0900 |
parents | 29dd37a418aa |
children | c6061cadb400 |
comparison
equal
deleted
inserted
replaced
36413:f493829b74dd | 36414:44c4a38bf563 |
---|---|
11 import struct | 11 import struct |
12 import zlib | 12 import zlib |
13 | 13 |
14 from .i18n import _ | 14 from .i18n import _ |
15 from . import ( | 15 from . import ( |
16 encoding, | |
16 error, | 17 error, |
17 policy, | 18 policy, |
18 pycompat, | 19 pycompat, |
19 util, | 20 util, |
20 ) | 21 ) |
346 # walk backwards from the start of the context up to the start of | 347 # walk backwards from the start of the context up to the start of |
347 # the previous hunk context until we find a line starting with an | 348 # the previous hunk context until we find a line starting with an |
348 # alphanumeric char. | 349 # alphanumeric char. |
349 for i in xrange(astart - 1, lastpos - 1, -1): | 350 for i in xrange(astart - 1, lastpos - 1, -1): |
350 if l1[i][0:1].isalnum(): | 351 if l1[i][0:1].isalnum(): |
351 func = ' ' + l1[i].rstrip()[:40] | 352 func = b' ' + l1[i].rstrip() |
353 # split long function name if ASCII. otherwise we have no | |
354 # idea where the multi-byte boundary is, so just leave it. | |
355 if encoding.isasciistr(func): | |
356 func = func[:41] | |
352 lastfunc[1] = func | 357 lastfunc[1] = func |
353 break | 358 break |
354 # by recording this hunk's starting point as the next place to | 359 # by recording this hunk's starting point as the next place to |
355 # start looking for function lines, we avoid reading any line in | 360 # start looking for function lines, we avoid reading any line in |
356 # the file more than once. | 361 # the file more than once. |