Mercurial > hg
changeset 12957:9f2ac318b92e
util: clarify purpose of MBTextWrapper class
It's easy to get confused and scared of an Unicode monster when
skimming through this code: document that this is really just
about column-counting.
author | Nicolas Dumazet <nicdumz.commits@gmail.com> |
---|---|
date | Tue, 09 Nov 2010 13:43:35 +0900 |
parents | 0406682c6905 |
children | 8957c39867f6 |
files | mercurial/util.py |
diffstat | 1 files changed, 13 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/util.py Mon Nov 08 17:29:23 2010 -0600 +++ b/mercurial/util.py Tue Nov 09 13:43:35 2010 +0900 @@ -1328,15 +1328,26 @@ #### naming convention of below implementation follows 'textwrap' module class MBTextWrapper(textwrap.TextWrapper): + """ + Extend TextWrapper for double-width characters. + + Some Asian characters use two terminal columns instead of one. + A good example of this behavior can be seen with u'\u65e5\u672c', + the two Japanese characters for "Japan": + len() returns 2, but when printed to a terminal, they eat 4 columns. + + (Note that this has nothing to do whatsoever with unicode + representation, or encoding of the underlying string) + """ def __init__(self, **kwargs): textwrap.TextWrapper.__init__(self, **kwargs) def _cutdown(self, str, space_left): l = 0 ucstr = unicode(str, encoding.encoding) - w = unicodedata.east_asian_width + colwidth = unicodedata.east_asian_width for i in xrange(len(ucstr)): - l += w(ucstr[i]) in 'WFA' and 2 or 1 + l += colwidth(ucstr[i]) in 'WFA' and 2 or 1 if space_left < l: return (ucstr[:i].encode(encoding.encoding), ucstr[i:].encode(encoding.encoding))