comparison mercurial/encoding.py @ 48892:fa2b1a46d92e

encoding: remove Python 2 support code Differential Revision: https://phab.mercurial-scm.org/D12295
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 03 Mar 2022 07:58:29 -0800
parents 6000f5b25c9b
children 642e31cb55f0
comparison
equal deleted inserted replaced
48891:4eae533354ae 48892:fa2b1a46d92e
44 asciiupper = charencode.asciiupper 44 asciiupper = charencode.asciiupper
45 _jsonescapeu8fast = charencode.jsonescapeu8fast 45 _jsonescapeu8fast = charencode.jsonescapeu8fast
46 46
47 _sysstr = pycompat.sysstr 47 _sysstr = pycompat.sysstr
48 48
49 if pycompat.ispy3: 49 unichr = chr
50 unichr = chr
51 50
52 # These unicode characters are ignored by HFS+ (Apple Technote 1150, 51 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
53 # "Unicode Subtleties"), so we need to ignore them in some places for 52 # "Unicode Subtleties"), so we need to ignore them in some places for
54 # sanity. 53 # sanity.
55 _ignore = [ 54 _ignore = [
76 return s 75 return s
77 76
78 77
79 # encoding.environ is provided read-only, which may not be used to modify 78 # encoding.environ is provided read-only, which may not be used to modify
80 # the process environment 79 # the process environment
81 _nativeenviron = not pycompat.ispy3 or os.supports_bytes_environ 80 _nativeenviron = os.supports_bytes_environ
82 if not pycompat.ispy3: 81 if _nativeenviron:
83 environ = os.environ # re-exports
84 elif _nativeenviron:
85 environ = os.environb # re-exports 82 environ = os.environb # re-exports
86 else: 83 else:
87 # preferred encoding isn't known yet; use utf-8 to avoid unicode error 84 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
88 # and recreate it once encoding is settled 85 # and recreate it once encoding is settled
89 environ = { 86 environ = {
96 b'ANSI_X3.4-1968': b'ascii', 93 b'ANSI_X3.4-1968': b'ascii',
97 } 94 }
98 # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2. 95 # cp65001 is a Windows variant of utf-8, which isn't supported on Python 2.
99 # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3. 96 # No idea if it should be rewritten to the canonical name 'utf-8' on Python 3.
100 # https://bugs.python.org/issue13216 97 # https://bugs.python.org/issue13216
101 if pycompat.iswindows and not pycompat.ispy3: 98 if pycompat.iswindows:
102 _encodingrewrites[b'cp65001'] = b'utf-8' 99 _encodingrewrites[b'cp65001'] = b'utf-8'
103 100
104 try: 101 try:
105 encoding = environ.get(b"HGENCODING") 102 encoding = environ.get(b"HGENCODING")
106 if not encoding: 103 if not encoding:
268 265
269 266
270 # converter functions between native str and byte string. use these if the 267 # converter functions between native str and byte string. use these if the
271 # character encoding is not aware (e.g. exception message) or is known to 268 # character encoding is not aware (e.g. exception message) or is known to
272 # be locale dependent (e.g. date formatting.) 269 # be locale dependent (e.g. date formatting.)
273 if pycompat.ispy3: 270 strtolocal = unitolocal
274 strtolocal = unitolocal 271 strfromlocal = unifromlocal
275 strfromlocal = unifromlocal 272 strmethod = unimethod
276 strmethod = unimethod
277 else:
278
279 def strtolocal(s):
280 # type: (str) -> bytes
281 return s # pytype: disable=bad-return-type
282
283 def strfromlocal(s):
284 # type: (bytes) -> str
285 return s # pytype: disable=bad-return-type
286
287 strmethod = pycompat.identity
288 273
289 274
290 def lower(s): 275 def lower(s):
291 # type: (bytes) -> bytes 276 # type: (bytes) -> bytes
292 """best-effort encoding-aware case-folding of local string s""" 277 """best-effort encoding-aware case-folding of local string s"""
342 327
343 328
344 if not _nativeenviron: 329 if not _nativeenviron:
345 # now encoding and helper functions are available, recreate the environ 330 # now encoding and helper functions are available, recreate the environ
346 # dict to be exported to other modules 331 # dict to be exported to other modules
347 if pycompat.iswindows and pycompat.ispy3: 332 if pycompat.iswindows:
348 333
349 class WindowsEnviron(dict): 334 class WindowsEnviron(dict):
350 """`os.environ` normalizes environment variables to uppercase on windows""" 335 """`os.environ` normalizes environment variables to uppercase on windows"""
351 336
352 def get(self, key, default=None): 337 def get(self, key, default=None):
358 environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8')) 343 environ[tolocal(k.encode('utf-8'))] = tolocal(v.encode('utf-8'))
359 344
360 345
361 DRIVE_RE = re.compile(b'^[a-z]:') 346 DRIVE_RE = re.compile(b'^[a-z]:')
362 347
363 if pycompat.ispy3: 348 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which
364 # os.getcwd() on Python 3 returns string, but it has os.getcwdb() which 349 # returns bytes.
365 # returns bytes. 350 if pycompat.iswindows:
366 if pycompat.iswindows: 351 # Python 3 on Windows issues a DeprecationWarning about using the bytes
367 # Python 3 on Windows issues a DeprecationWarning about using the bytes 352 # API when os.getcwdb() is called.
368 # API when os.getcwdb() is called. 353 #
369 # 354 # Additionally, py3.8+ uppercases the drive letter when calling
370 # Additionally, py3.8+ uppercases the drive letter when calling 355 # os.path.realpath(), which is used on ``repo.root``. Since those
371 # os.path.realpath(), which is used on ``repo.root``. Since those 356 # strings are compared in various places as simple strings, also call
372 # strings are compared in various places as simple strings, also call 357 # realpath here. See https://bugs.python.org/issue40368
373 # realpath here. See https://bugs.python.org/issue40368 358 #
374 # 359 # However this is not reliable, so lets explicitly make this drive
375 # However this is not reliable, so lets explicitly make this drive 360 # letter upper case.
376 # letter upper case. 361 #
377 # 362 # note: we should consider dropping realpath here since it seems to
378 # note: we should consider dropping realpath here since it seems to 363 # change the semantic of `getcwd`.
379 # change the semantic of `getcwd`. 364
380 365 def getcwd():
381 def getcwd(): 366 cwd = os.getcwd() # re-exports
382 cwd = os.getcwd() # re-exports 367 cwd = os.path.realpath(cwd)
383 cwd = os.path.realpath(cwd) 368 cwd = strtolocal(cwd)
384 cwd = strtolocal(cwd) 369 if DRIVE_RE.match(cwd):
385 if DRIVE_RE.match(cwd): 370 cwd = cwd[0:1].upper() + cwd[1:]
386 cwd = cwd[0:1].upper() + cwd[1:] 371 return cwd
387 return cwd 372
388 373
389 else:
390 getcwd = os.getcwdb # re-exports
391 else: 374 else:
392 getcwd = os.getcwd # re-exports 375 getcwd = os.getcwdb # re-exports
393 376
394 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide. 377 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
395 _wide = _sysstr( 378 _wide = _sysstr(
396 environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide" 379 environ.get(b"HGENCODINGAMBIGUOUS", b"narrow") == b"wide"
397 and b"WFA" 380 and b"WFA"
598 return charencodepure.jsonescapeu8fallback(u8chars, paranoid) 581 return charencodepure.jsonescapeu8fallback(u8chars, paranoid)
599 582
600 583
601 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8 584 # We need to decode/encode U+DCxx codes transparently since invalid UTF-8
602 # bytes are mapped to that range. 585 # bytes are mapped to that range.
603 if pycompat.ispy3: 586 _utf8strict = r'surrogatepass'
604 _utf8strict = r'surrogatepass'
605 else:
606 _utf8strict = r'strict'
607 587
608 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4] 588 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
609 589
610 590
611 def getutf8char(s, pos): 591 def getutf8char(s, pos):