typing: add type hints to the `charencode` module
Since this module is dynamically imported from either `mercurial.pure` or
`mercurial.cext`, these hints aren't detected in `mercurial.encoding`, and need
to be imported directly there during the type-checking phase. This keeps the
runtime selection via the policy config in place, but allows pytype to see these
as functions with proper signatures instead of just `Any`. We don't attempt to
import the `mercurial.cext` version yet because there's no types stubs for that
module, but this will get the ball rolling.
I thought this would spill over into other modules from there, but the only two
*.pyi files that changed were for `encoding` and `charencode`. Applying this to
other dynamically selected modules will clean some things up in other files, so
this is a start. I had originally redefined the functions in the type-checking
block (like some of the `os.path` aliasing in `mercurial.util`), but this is
better because we won't have another duplication of the definitions that may get
out of date.
# charencode.py - miscellaneous character encoding
#
# Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
import array
from .. import pycompat
def isasciistr(s: bytes) -> bool:
try:
s.decode('ascii')
return True
except UnicodeDecodeError:
return False
def asciilower(s: bytes) -> bytes:
"""convert a string to lowercase if ASCII
Raises UnicodeDecodeError if non-ASCII characters are found."""
s.decode('ascii')
return s.lower()
def asciiupper(s: bytes) -> bytes:
"""convert a string to uppercase if ASCII
Raises UnicodeDecodeError if non-ASCII characters are found."""
s.decode('ascii')
return s.upper()
_jsonmap = []
_jsonmap.extend(b"\\u%04x" % x for x in range(32))
_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
_jsonmap.append(b'\\u007f')
_jsonmap[0x09] = b'\\t'
_jsonmap[0x0A] = b'\\n'
_jsonmap[0x22] = b'\\"'
_jsonmap[0x5C] = b'\\\\'
_jsonmap[0x08] = b'\\b'
_jsonmap[0x0C] = b'\\f'
_jsonmap[0x0D] = b'\\r'
_paranoidjsonmap = _jsonmap[:]
_paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>")
_paranoidjsonmap[0x3E] = b'\\u003e' # '>'
_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
def jsonescapeu8fast(u8chars: bytes, paranoid: bool) -> bytes:
"""Convert a UTF-8 byte string to JSON-escaped form (fast path)
Raises ValueError if non-ASCII characters have to be escaped.
"""
if paranoid:
jm = _paranoidjsonmap
else:
jm = _jsonmap
try:
return b''.join(jm[x] for x in bytearray(u8chars))
except IndexError:
raise ValueError
_utf8strict = r'surrogatepass'
def jsonescapeu8fallback(u8chars: bytes, paranoid: bool) -> bytes:
"""Convert a UTF-8 byte string to JSON-escaped form (slow path)
Escapes all non-ASCII characters no matter if paranoid is False.
"""
if paranoid:
jm = _paranoidjsonmap
else:
jm = _jsonmap
# non-BMP char is represented as UTF-16 surrogate pair
u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
u16codes = array.array('H', u16b)
u16codes.pop(0) # drop BOM
return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes)