diff: do not split function name if character encoding is unknown
Only ASCII characters can be split reliably at any byte positions, so let's
just leave long multi-byte sequence long. It's probably less bad than putting
an invalid byte sequence into a diff.
This doesn't try to split the first ASCII slice from multi-byte sequence
because a combining character may follow.
--- a/mercurial/mdiff.py Sun Feb 25 11:20:35 2018 +0900
+++ b/mercurial/mdiff.py Fri Feb 23 23:09:58 2018 +0900
@@ -13,6 +13,7 @@
from .i18n import _
from . import (
+ encoding,
error,
policy,
pycompat,
@@ -348,7 +349,11 @@
# alphanumeric char.
for i in xrange(astart - 1, lastpos - 1, -1):
if l1[i][0:1].isalnum():
- func = ' ' + l1[i].rstrip()[:40]
+ func = b' ' + l1[i].rstrip()
+ # split long function name if ASCII. otherwise we have no
+ # idea where the multi-byte boundary is, so just leave it.
+ if encoding.isasciistr(func):
+ func = func[:41]
lastfunc[1] = func
break
# by recording this hunk's starting point as the next place to
--- a/tests/test-diff-unified.t Sun Feb 25 11:20:35 2018 +0900
+++ b/tests/test-diff-unified.t Fri Feb 23 23:09:58 2018 +0900
@@ -386,3 +386,73 @@
}
$ cd ..
+
+Long function names should be abbreviated, but multi-byte character shouldn't
+be broken up
+
+ $ hg init longfunc
+ $ cd longfunc
+
+ >>> with open('a', 'wb') as f:
+ ... f.write(b'a' * 39 + b'bb' + b'\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b' 0 b\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b'a' * 39 + b'\xc3\xa0' + b'\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b' 0 a with grave (single code point)\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b'a' * 39 + b'a\xcc\x80' + b'\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b' 0 a with grave (composition)\n')
+ ... f.write(b' .\n' * 3)
+ $ hg ci -qAm0
+
+ >>> with open('a', 'wb') as f:
+ ... f.write(b'a' * 39 + b'bb' + b'\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b' 1 b\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b'a' * 39 + b'\xc3\xa0' + b'\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b' 1 a with grave (single code point)\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b'a' * 39 + b'a\xcc\x80' + b'\n')
+ ... f.write(b' .\n' * 3)
+ ... f.write(b' 1 a with grave (composition)\n')
+ ... f.write(b' .\n' * 3)
+ $ hg ci -m1
+
+ $ hg diff -c1 --nodates --show-function
+ diff -r 3e92dd6fa812 -r a256341606cb a
+ --- a/a
+ +++ b/a
+ @@ -2,7 +2,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab
+ .
+ .
+ .
+ - 0 b
+ + 1 b
+ .
+ .
+ .
+ @@ -10,7 +10,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xc3\xa0 (esc)
+ .
+ .
+ .
+ - 0 a with grave (single code point)
+ + 1 a with grave (single code point)
+ .
+ .
+ .
+ @@ -18,7 +18,7 @@ aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\xcc\x80 (esc)
+ .
+ .
+ .
+ - 0 a with grave (composition)
+ + 1 a with grave (composition)
+ .
+ .
+ .
+
+ $ cd ..