util: wrap lines with multi-byte characters correctly (
issue2943)
This re-introduces the unicode conversion what was lost in
d320e70442a5 5 years
ago and had the comment:
To avoid corrupting multi-byte characters in line, we must wrap
a Unicode string instead of a bytestring.
--- a/mercurial/util.py Mon Aug 08 11:34:52 2011 +0100
+++ b/mercurial/util.py Sat Aug 06 23:52:20 2011 +0200
@@ -1171,16 +1171,14 @@
def __init__(self, **kwargs):
textwrap.TextWrapper.__init__(self, **kwargs)
- def _cutdown(self, str, space_left):
+ def _cutdown(self, ucstr, space_left):
l = 0
- ucstr = unicode(str, encoding.encoding)
colwidth = unicodedata.east_asian_width
for i in xrange(len(ucstr)):
l += colwidth(ucstr[i]) in 'WFA' and 2 or 1
if space_left < l:
- return (ucstr[:i].encode(encoding.encoding),
- ucstr[i:].encode(encoding.encoding))
- return str, ''
+ return (ucstr[:i], ucstr[i:])
+ return ucstr, ''
# overriding of base class
def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
@@ -1202,10 +1200,13 @@
if width <= maxindent:
# adjust for weird terminal size
width = max(78, maxindent + 1)
+ line = line.decode(encoding.encoding, encoding.encodingmode)
+ initindent = initindent.decode(encoding.encoding, encoding.encodingmode)
+ hangindent = hangindent.decode(encoding.encoding, encoding.encodingmode)
wrapper = MBTextWrapper(width=width,
initial_indent=initindent,
subsequent_indent=hangindent)
- return wrapper.fill(line)
+ return wrapper.fill(line).encode(encoding.encoding)
def iterlines(iterator):
for chunk in iterator:
--- a/tests/test-encoding-align.t Mon Aug 08 11:34:52 2011 +0100
+++ b/tests/test-encoding-align.t Sat Aug 06 23:52:20 2011 +0200
@@ -22,14 +22,14 @@
> cmdtable = {
> 'showoptlist':
> (showoptlist,
- > [('s', 'opt1', '', 'short width', '""" + s + """'),
- > ('m', 'opt2', '', 'middle width', '""" + m + """'),
- > ('l', 'opt3', '', 'long width', '""" + l + """')
+ > [('s', 'opt1', '', 'short width' + ' %(s)s' * 8, '%(s)s'),
+ > ('m', 'opt2', '', 'middle width' + ' %(m)s' * 8, '%(m)s'),
+ > ('l', 'opt3', '', 'long width' + ' %(l)s' * 8, '%(l)s')
> ],
> ""
> )
> }
- > """)
+ > """ % globals())
> f.close()
> EOF
$ S=`cat s`
@@ -52,9 +52,11 @@
options:
- -s --opt1 \xe7\x9f\xad\xe5\x90\x8d short width (esc)
- -m --opt2 MIDDLE_ middle width
- -l --opt3 \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d long width (esc)
+ -s --opt1 \xe7\x9f\xad\xe5\x90\x8d short width \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d \xe7\x9f\xad\xe5\x90\x8d (esc)
+ -m --opt2 MIDDLE_ middle width MIDDLE_ MIDDLE_ MIDDLE_ MIDDLE_ MIDDLE_
+ MIDDLE_ MIDDLE_ MIDDLE_
+ -l --opt3 \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d long width \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d (esc)
+ \xe9\x95\xb7\xe3\x81\x84\xe9\x95\xb7\xe3\x81\x84\xe5\x90\x8d\xe5\x89\x8d (esc)
use "hg -v help showoptlist" to show global options