Mercurial > hg
comparison mercurial/encoding.py @ 21856:d24969ee272f
encoding: add 'trim' to trim multi-byte characters at most specified columns
Newly added 'trim' is used to trim multi-byte characters at most
specified columns correctly: directly slicing byte sequence should be
replaced with 'encoding.trim', because the former may split at
intermediate multi-byte sequence.
Slicing unicode sequence ('uslice') and concatenation with ellipsis
('concat') are defined as function, to make enhancement in subsequent
patch easier.
author | FUJIWARA Katsunori <foozy@lares.dti.ne.jp> |
---|---|
date | Sun, 06 Jul 2014 02:56:41 +0900 |
parents | 404feac78b8a |
children | b515c3a63e96 |
comparison
equal
deleted
inserted
replaced
21854:ba3bc6474bbf | 21856:d24969ee272f |
---|---|
163 for x in xrange(start + c, len(s)): | 163 for x in xrange(start + c, len(s)): |
164 t = s[start:x] | 164 t = s[start:x] |
165 if colwidth(t) == c: | 165 if colwidth(t) == c: |
166 return t | 166 return t |
167 | 167 |
168 def trim(s, width, ellipsis=''): | |
169 """Trim string 's' to at most 'width' columns (including 'ellipsis'). | |
170 | |
171 >>> ellipsis = '+++' | |
172 >>> from mercurial import encoding | |
173 >>> encoding.encoding = 'utf-8' | |
174 >>> t= '1234567890' | |
175 >>> print trim(t, 12, ellipsis=ellipsis) | |
176 1234567890 | |
177 >>> print trim(t, 10, ellipsis=ellipsis) | |
178 1234567890 | |
179 >>> print trim(t, 8, ellipsis=ellipsis) | |
180 12345+++ | |
181 >>> print trim(t, 8) | |
182 12345678 | |
183 >>> print trim(t, 3, ellipsis=ellipsis) | |
184 +++ | |
185 >>> print trim(t, 1, ellipsis=ellipsis) | |
186 + | |
187 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns | |
188 >>> t = u.encode(encoding.encoding) | |
189 >>> print trim(t, 12, ellipsis=ellipsis) | |
190 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a | |
191 >>> print trim(t, 10, ellipsis=ellipsis) | |
192 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a | |
193 >>> print trim(t, 8, ellipsis=ellipsis) | |
194 \xe3\x81\x82\xe3\x81\x84+++ | |
195 >>> print trim(t, 5) | |
196 \xe3\x81\x82\xe3\x81\x84 | |
197 >>> print trim(t, 4, ellipsis=ellipsis) | |
198 +++ | |
199 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence | |
200 >>> print trim(t, 12, ellipsis=ellipsis) | |
201 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa | |
202 >>> print trim(t, 10, ellipsis=ellipsis) | |
203 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa | |
204 >>> print trim(t, 8, ellipsis=ellipsis) | |
205 \x11\x22\x33\x44\x55+++ | |
206 >>> print trim(t, 8) | |
207 \x11\x22\x33\x44\x55\x66\x77\x88 | |
208 >>> print trim(t, 3, ellipsis=ellipsis) | |
209 +++ | |
210 >>> print trim(t, 1, ellipsis=ellipsis) | |
211 + | |
212 """ | |
213 try: | |
214 u = s.decode(encoding) | |
215 except UnicodeDecodeError: | |
216 if len(s) <= width: # trimming is not needed | |
217 return s | |
218 width -= len(ellipsis) | |
219 if width <= 0: # no enough room even for ellipsis | |
220 return ellipsis[:width + len(ellipsis)] | |
221 return s[:width] + ellipsis | |
222 | |
223 if ucolwidth(u) <= width: # trimming is not needed | |
224 return s | |
225 | |
226 width -= len(ellipsis) | |
227 if width <= 0: # no enough room even for ellipsis | |
228 return ellipsis[:width + len(ellipsis)] | |
229 | |
230 uslice = lambda i: u[:-i] | |
231 concat = lambda s: s + ellipsis | |
232 for i in xrange(1, len(u)): | |
233 usub = uslice(i) | |
234 if ucolwidth(usub) <= width: | |
235 return concat(usub.encode(encoding)) | |
236 return ellipsis # no enough room for multi-column characters | |
237 | |
168 def lower(s): | 238 def lower(s): |
169 "best-effort encoding-aware case-folding of local string s" | 239 "best-effort encoding-aware case-folding of local string s" |
170 try: | 240 try: |
171 s.decode('ascii') # throw exception for non-ASCII character | 241 s.decode('ascii') # throw exception for non-ASCII character |
172 return s.lower() | 242 return s.lower() |