comparison mercurial/encoding.py @ 41836:25694a78e4a4

encoding: use raw strings for encoding arguments This prevents the internals of Python from coercing a unicode to str on Python 2 and makes tests run with HGUNICODEPEDANTRY=1 a lot happier. Differential Revision: https://phab.mercurial-scm.org/D6051
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 02 Mar 2019 13:07:58 -0800
parents 9e8fcd2e78c1
children 2372284d9457
comparison
equal deleted inserted replaced
41835:ddb174511f1b 41836:25694a78e4a4
63 elif _nativeenviron: 63 elif _nativeenviron:
64 environ = os.environb # re-exports 64 environ = os.environb # re-exports
65 else: 65 else:
66 # preferred encoding isn't known yet; use utf-8 to avoid unicode error 66 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
67 # and recreate it once encoding is settled 67 # and recreate it once encoding is settled
68 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8')) 68 environ = dict((k.encode(r'utf-8'), v.encode(r'utf-8'))
69 for k, v in os.environ.items()) # re-exports 69 for k, v in os.environ.items()) # re-exports
70 70
71 _encodingrewrites = { 71 _encodingrewrites = {
72 '646': 'ascii', 72 '646': 'ascii',
73 'ANSI_X3.4-1968': 'ascii', 73 'ANSI_X3.4-1968': 'ascii',
150 # make sure string is actually stored in UTF-8 150 # make sure string is actually stored in UTF-8
151 u = s.decode('UTF-8') 151 u = s.decode('UTF-8')
152 if encoding == 'UTF-8': 152 if encoding == 'UTF-8':
153 # fast path 153 # fast path
154 return s 154 return s
155 r = u.encode(_sysstr(encoding), u"replace") 155 r = u.encode(_sysstr(encoding), r"replace")
156 if u == r.decode(_sysstr(encoding)): 156 if u == r.decode(_sysstr(encoding)):
157 # r is a safe, non-lossy encoding of s 157 # r is a safe, non-lossy encoding of s
158 return safelocalstr(r) 158 return safelocalstr(r)
159 return localstr(s, r) 159 return localstr(s, r)
160 except UnicodeDecodeError: 160 except UnicodeDecodeError:
161 # we should only get here if we're looking at an ancient changeset 161 # we should only get here if we're looking at an ancient changeset
162 try: 162 try:
163 u = s.decode(_sysstr(fallbackencoding)) 163 u = s.decode(_sysstr(fallbackencoding))
164 r = u.encode(_sysstr(encoding), u"replace") 164 r = u.encode(_sysstr(encoding), r"replace")
165 if u == r.decode(_sysstr(encoding)): 165 if u == r.decode(_sysstr(encoding)):
166 # r is a safe, non-lossy encoding of s 166 # r is a safe, non-lossy encoding of s
167 return safelocalstr(r) 167 return safelocalstr(r)
168 return localstr(u.encode('UTF-8'), r) 168 return localstr(u.encode('UTF-8'), r)
169 except UnicodeDecodeError: 169 except UnicodeDecodeError:
170 u = s.decode("utf-8", "replace") # last ditch 170 u = s.decode("utf-8", "replace") # last ditch
171 # can't round-trip 171 # can't round-trip
172 return u.encode(_sysstr(encoding), u"replace") 172 return u.encode(_sysstr(encoding), r"replace")
173 except LookupError as k: 173 except LookupError as k:
174 raise error.Abort(k, hint="please check your locale settings") 174 raise error.Abort(k, hint="please check your locale settings")
175 175
176 def fromlocal(s): 176 def fromlocal(s):
177 """ 177 """
228 strmethod = pycompat.identity 228 strmethod = pycompat.identity
229 229
230 if not _nativeenviron: 230 if not _nativeenviron:
231 # now encoding and helper functions are available, recreate the environ 231 # now encoding and helper functions are available, recreate the environ
232 # dict to be exported to other modules 232 # dict to be exported to other modules
233 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8'))) 233 environ = dict((tolocal(k.encode(r'utf-8')), tolocal(v.encode(r'utf-8')))
234 for k, v in os.environ.items()) # re-exports 234 for k, v in os.environ.items()) # re-exports
235 235
236 if pycompat.ispy3: 236 if pycompat.ispy3:
237 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which 237 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
238 # returns bytes. 238 # returns bytes.
249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide" 249 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
250 and "WFA" or "WF") 250 and "WFA" or "WF")
251 251
252 def colwidth(s): 252 def colwidth(s):
253 "Find the column width of a string for display in the local encoding" 253 "Find the column width of a string for display in the local encoding"
254 return ucolwidth(s.decode(_sysstr(encoding), u'replace')) 254 return ucolwidth(s.decode(_sysstr(encoding), r'replace'))
255 255
256 def ucolwidth(d): 256 def ucolwidth(d):
257 "Find the column width of a Unicode string for display" 257 "Find the column width of a Unicode string for display"
258 eaw = getattr(unicodedata, 'east_asian_width', None) 258 eaw = getattr(unicodedata, 'east_asian_width', None)
259 if eaw is not None: 259 if eaw is not None: