--- a/mercurial/pycompat.py Tue Mar 01 20:29:03 2022 -0800
+++ b/mercurial/pycompat.py Thu Mar 03 07:59:42 2022 -0800
@@ -11,21 +11,26 @@
from __future__ import absolute_import
import builtins
+import codecs
import concurrent.futures as futures
+import functools
import getopt
import http.client as httplib
import http.cookiejar as cookielib
import inspect
+import io
import json
import os
import pickle
import queue
import shlex
import socketserver
+import struct
import sys
import tempfile
import xmlrpc.client as xmlrpclib
+
ispy3 = sys.version_info[0] >= 3
ispypy = '__pypy__' in sys.builtin_module_names
TYPE_CHECKING = False
@@ -82,401 +87,340 @@
return _rapply(f, xs)
-if ispy3:
- import builtins
- import codecs
- import functools
- import io
- import struct
-
- if os.name == r'nt' and sys.version_info >= (3, 6):
- # MBCS (or ANSI) filesystem encoding must be used as before.
- # Otherwise non-ASCII filenames in existing repositories would be
- # corrupted.
- # This must be set once prior to any fsencode/fsdecode calls.
- sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
+if os.name == r'nt' and sys.version_info >= (3, 6):
+ # MBCS (or ANSI) filesystem encoding must be used as before.
+ # Otherwise non-ASCII filenames in existing repositories would be
+ # corrupted.
+ # This must be set once prior to any fsencode/fsdecode calls.
+ sys._enablelegacywindowsfsencoding() # pytype: disable=module-attr
- fsencode = os.fsencode
- fsdecode = os.fsdecode
- oscurdir = os.curdir.encode('ascii')
- oslinesep = os.linesep.encode('ascii')
- osname = os.name.encode('ascii')
- ospathsep = os.pathsep.encode('ascii')
- ospardir = os.pardir.encode('ascii')
- ossep = os.sep.encode('ascii')
- osaltsep = os.altsep
- if osaltsep:
- osaltsep = osaltsep.encode('ascii')
- osdevnull = os.devnull.encode('ascii')
+fsencode = os.fsencode
+fsdecode = os.fsdecode
+oscurdir = os.curdir.encode('ascii')
+oslinesep = os.linesep.encode('ascii')
+osname = os.name.encode('ascii')
+ospathsep = os.pathsep.encode('ascii')
+ospardir = os.pardir.encode('ascii')
+ossep = os.sep.encode('ascii')
+osaltsep = os.altsep
+if osaltsep:
+ osaltsep = osaltsep.encode('ascii')
+osdevnull = os.devnull.encode('ascii')
- sysplatform = sys.platform.encode('ascii')
- sysexecutable = sys.executable
- if sysexecutable:
- sysexecutable = os.fsencode(sysexecutable)
- bytesio = io.BytesIO
- # TODO deprecate stringio name, as it is a lie on Python 3.
- stringio = bytesio
+sysplatform = sys.platform.encode('ascii')
+sysexecutable = sys.executable
+if sysexecutable:
+ sysexecutable = os.fsencode(sysexecutable)
+bytesio = io.BytesIO
+# TODO deprecate stringio name, as it is a lie on Python 3.
+stringio = bytesio
- def maplist(*args):
- return list(map(*args))
+
+def maplist(*args):
+ return list(map(*args))
- def rangelist(*args):
- return list(range(*args))
+
+def rangelist(*args):
+ return list(range(*args))
- def ziplist(*args):
- return list(zip(*args))
+
+def ziplist(*args):
+ return list(zip(*args))
+
- rawinput = input
- getargspec = inspect.getfullargspec
+rawinput = input
+getargspec = inspect.getfullargspec
- long = int
+long = int
- if getattr(sys, 'argv', None) is not None:
- # On POSIX, the char** argv array is converted to Python str using
- # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
- # isn't directly callable from Python code. In practice, os.fsencode()
- # can be used instead (this is recommended by Python's documentation
- # for sys.argv).
- #
- # On Windows, the wchar_t **argv is passed into the interpreter as-is.
- # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
- # there's an additional wrinkle. What we really want to access is the
- # ANSI codepage representation of the arguments, as this is what
- # `int main()` would receive if Python 3 didn't define `int wmain()`
- # (this is how Python 2 worked). To get that, we encode with the mbcs
- # encoding, which will pass CP_ACP to the underlying Windows API to
- # produce bytes.
- if os.name == r'nt':
- sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
- else:
- sysargv = [fsencode(a) for a in sys.argv]
+if getattr(sys, 'argv', None) is not None:
+ # On POSIX, the char** argv array is converted to Python str using
+ # Py_DecodeLocale(). The inverse of this is Py_EncodeLocale(), which
+ # isn't directly callable from Python code. In practice, os.fsencode()
+ # can be used instead (this is recommended by Python's documentation
+ # for sys.argv).
+ #
+ # On Windows, the wchar_t **argv is passed into the interpreter as-is.
+ # Like POSIX, we need to emulate what Py_EncodeLocale() would do. But
+ # there's an additional wrinkle. What we really want to access is the
+ # ANSI codepage representation of the arguments, as this is what
+ # `int main()` would receive if Python 3 didn't define `int wmain()`
+ # (this is how Python 2 worked). To get that, we encode with the mbcs
+ # encoding, which will pass CP_ACP to the underlying Windows API to
+ # produce bytes.
+ if os.name == r'nt':
+ sysargv = [a.encode("mbcs", "ignore") for a in sys.argv]
+ else:
+ sysargv = [fsencode(a) for a in sys.argv]
- bytechr = struct.Struct('>B').pack
- byterepr = b'%r'.__mod__
-
- class bytestr(bytes):
- """A bytes which mostly acts as a Python 2 str
+bytechr = struct.Struct('>B').pack
+byterepr = b'%r'.__mod__
- >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
- ('', 'foo', 'ascii', '1')
- >>> s = bytestr(b'foo')
- >>> assert s is bytestr(s)
- __bytes__() should be called if provided:
+class bytestr(bytes):
+ """A bytes which mostly acts as a Python 2 str
- >>> class bytesable(object):
- ... def __bytes__(self):
- ... return b'bytes'
- >>> bytestr(bytesable())
- 'bytes'
+ >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
+ ('', 'foo', 'ascii', '1')
+ >>> s = bytestr(b'foo')
+ >>> assert s is bytestr(s)
+
+ __bytes__() should be called if provided:
- There's no implicit conversion from non-ascii str as its encoding is
- unknown:
+ >>> class bytesable(object):
+ ... def __bytes__(self):
+ ... return b'bytes'
+ >>> bytestr(bytesable())
+ 'bytes'
- >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
- Traceback (most recent call last):
- ...
- UnicodeEncodeError: ...
-
- Comparison between bytestr and bytes should work:
+ There's no implicit conversion from non-ascii str as its encoding is
+ unknown:
- >>> assert bytestr(b'foo') == b'foo'
- >>> assert b'foo' == bytestr(b'foo')
- >>> assert b'f' in bytestr(b'foo')
- >>> assert bytestr(b'f') in b'foo'
+ >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
+ Traceback (most recent call last):
+ ...
+ UnicodeEncodeError: ...
- Sliced elements should be bytes, not integer:
+ Comparison between bytestr and bytes should work:
- >>> s[1], s[:2]
- (b'o', b'fo')
- >>> list(s), list(reversed(s))
- ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
-
- As bytestr type isn't propagated across operations, you need to cast
- bytes to bytestr explicitly:
+ >>> assert bytestr(b'foo') == b'foo'
+ >>> assert b'foo' == bytestr(b'foo')
+ >>> assert b'f' in bytestr(b'foo')
+ >>> assert bytestr(b'f') in b'foo'
- >>> s = bytestr(b'foo').upper()
- >>> t = bytestr(s)
- >>> s[0], t[0]
- (70, b'F')
+ Sliced elements should be bytes, not integer:
- Be careful to not pass a bytestr object to a function which expects
- bytearray-like behavior.
+ >>> s[1], s[:2]
+ (b'o', b'fo')
+ >>> list(s), list(reversed(s))
+ ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
- >>> t = bytes(t) # cast to bytes
- >>> assert type(t) is bytes
- """
+ As bytestr type isn't propagated across operations, you need to cast
+ bytes to bytestr explicitly:
- # Trick pytype into not demanding Iterable[int] be passed to __new__(),
- # since the appropriate bytes format is done internally.
- #
- # https://github.com/google/pytype/issues/500
- if TYPE_CHECKING:
+ >>> s = bytestr(b'foo').upper()
+ >>> t = bytestr(s)
+ >>> s[0], t[0]
+ (70, b'F')
- def __init__(self, s=b''):
- pass
+ Be careful to not pass a bytestr object to a function which expects
+ bytearray-like behavior.
+
+ >>> t = bytes(t) # cast to bytes
+ >>> assert type(t) is bytes
+ """
- def __new__(cls, s=b''):
- if isinstance(s, bytestr):
- return s
- if not isinstance(
- s, (bytes, bytearray)
- ) and not hasattr( # hasattr-py3-only
- s, u'__bytes__'
- ):
- s = str(s).encode('ascii')
- return bytes.__new__(cls, s)
+ # Trick pytype into not demanding Iterable[int] be passed to __new__(),
+ # since the appropriate bytes format is done internally.
+ #
+ # https://github.com/google/pytype/issues/500
+ if TYPE_CHECKING:
- def __getitem__(self, key):
- s = bytes.__getitem__(self, key)
- if not isinstance(s, bytes):
- s = bytechr(s)
+ def __init__(self, s=b''):
+ pass
+
+ def __new__(cls, s=b''):
+ if isinstance(s, bytestr):
return s
-
- def __iter__(self):
- return iterbytestr(bytes.__iter__(self))
-
- def __repr__(self):
- return bytes.__repr__(self)[1:] # drop b''
+ if not isinstance(
+ s, (bytes, bytearray)
+ ) and not hasattr( # hasattr-py3-only
+ s, u'__bytes__'
+ ):
+ s = str(s).encode('ascii')
+ return bytes.__new__(cls, s)
- def iterbytestr(s):
- """Iterate bytes as if it were a str object of Python 2"""
- return map(bytechr, s)
-
- def maybebytestr(s):
- """Promote bytes to bytestr"""
- if isinstance(s, bytes):
- return bytestr(s)
+ def __getitem__(self, key):
+ s = bytes.__getitem__(self, key)
+ if not isinstance(s, bytes):
+ s = bytechr(s)
return s
- def sysbytes(s):
- """Convert an internal str (e.g. keyword, __doc__) back to bytes
+ def __iter__(self):
+ return iterbytestr(bytes.__iter__(self))
+
+ def __repr__(self):
+ return bytes.__repr__(self)[1:] # drop b''
+
- This never raises UnicodeEncodeError, but only ASCII characters
- can be round-trip by sysstr(sysbytes(s)).
- """
- if isinstance(s, bytes):
- return s
- return s.encode('utf-8')
+def iterbytestr(s):
+ """Iterate bytes as if it were a str object of Python 2"""
+ return map(bytechr, s)
+
- def sysstr(s):
- """Return a keyword str to be passed to Python functions such as
- getattr() and str.encode()
+def maybebytestr(s):
+ """Promote bytes to bytestr"""
+ if isinstance(s, bytes):
+ return bytestr(s)
+ return s
+
- This never raises UnicodeDecodeError. Non-ascii characters are
- considered invalid and mapped to arbitrary but unique code points
- such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
- """
- if isinstance(s, builtins.str):
- return s
- return s.decode('latin-1')
+def sysbytes(s):
+ """Convert an internal str (e.g. keyword, __doc__) back to bytes
+
+ This never raises UnicodeEncodeError, but only ASCII characters
+ can be round-trip by sysstr(sysbytes(s)).
+ """
+ if isinstance(s, bytes):
+ return s
+ return s.encode('utf-8')
+
- def strurl(url):
- """Converts a bytes url back to str"""
- if isinstance(url, bytes):
- return url.decode('ascii')
- return url
+def sysstr(s):
+ """Return a keyword str to be passed to Python functions such as
+ getattr() and str.encode()
+
+ This never raises UnicodeDecodeError. Non-ascii characters are
+ considered invalid and mapped to arbitrary but unique code points
+ such that 'sysstr(a) != sysstr(b)' for all 'a != b'.
+ """
+ if isinstance(s, builtins.str):
+ return s
+ return s.decode('latin-1')
+
- def bytesurl(url):
- """Converts a str url to bytes by encoding in ascii"""
- if isinstance(url, str):
- return url.encode('ascii')
- return url
+def strurl(url):
+ """Converts a bytes url back to str"""
+ if isinstance(url, bytes):
+ return url.decode('ascii')
+ return url
+
- def raisewithtb(exc, tb):
- """Raise exception with the given traceback"""
- raise exc.with_traceback(tb)
+def bytesurl(url):
+ """Converts a str url to bytes by encoding in ascii"""
+ if isinstance(url, str):
+ return url.encode('ascii')
+ return url
- def getdoc(obj):
- """Get docstring as bytes; may be None so gettext() won't confuse it
- with _('')"""
- doc = getattr(obj, '__doc__', None)
- if doc is None:
- return doc
- return sysbytes(doc)
+
+def raisewithtb(exc, tb):
+ """Raise exception with the given traceback"""
+ raise exc.with_traceback(tb)
+
- def _wrapattrfunc(f):
- @functools.wraps(f)
- def w(object, name, *args):
- return f(object, sysstr(name), *args)
+def getdoc(obj):
+ """Get docstring as bytes; may be None so gettext() won't confuse it
+ with _('')"""
+ doc = getattr(obj, '__doc__', None)
+ if doc is None:
+ return doc
+ return sysbytes(doc)
- return w
+
+def _wrapattrfunc(f):
+ @functools.wraps(f)
+ def w(object, name, *args):
+ return f(object, sysstr(name), *args)
- # these wrappers are automagically imported by hgloader
- delattr = _wrapattrfunc(builtins.delattr)
- getattr = _wrapattrfunc(builtins.getattr)
- hasattr = _wrapattrfunc(builtins.hasattr)
- setattr = _wrapattrfunc(builtins.setattr)
- xrange = builtins.range
- unicode = str
+ return w
+
- def open(name, mode=b'r', buffering=-1, encoding=None):
- return builtins.open(name, sysstr(mode), buffering, encoding)
+# these wrappers are automagically imported by hgloader
+delattr = _wrapattrfunc(builtins.delattr)
+getattr = _wrapattrfunc(builtins.getattr)
+hasattr = _wrapattrfunc(builtins.hasattr)
+setattr = _wrapattrfunc(builtins.setattr)
+xrange = builtins.range
+unicode = str
- safehasattr = _wrapattrfunc(builtins.hasattr)
+
+def open(name, mode=b'r', buffering=-1, encoding=None):
+ return builtins.open(name, sysstr(mode), buffering, encoding)
+
- def _getoptbwrapper(orig, args, shortlist, namelist):
- """
- Takes bytes arguments, converts them to unicode, pass them to
- getopt.getopt(), convert the returned values back to bytes and then
- return them for Python 3 compatibility as getopt.getopt() don't accepts
- bytes on Python 3.
- """
- args = [a.decode('latin-1') for a in args]
- shortlist = shortlist.decode('latin-1')
- namelist = [a.decode('latin-1') for a in namelist]
- opts, args = orig(args, shortlist, namelist)
- opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
- args = [a.encode('latin-1') for a in args]
- return opts, args
+safehasattr = _wrapattrfunc(builtins.hasattr)
+
+
+def _getoptbwrapper(orig, args, shortlist, namelist):
+ """
+ Takes bytes arguments, converts them to unicode, pass them to
+ getopt.getopt(), convert the returned values back to bytes and then
+ return them for Python 3 compatibility as getopt.getopt() don't accepts
+ bytes on Python 3.
+ """
+ args = [a.decode('latin-1') for a in args]
+ shortlist = shortlist.decode('latin-1')
+ namelist = [a.decode('latin-1') for a in namelist]
+ opts, args = orig(args, shortlist, namelist)
+ opts = [(a[0].encode('latin-1'), a[1].encode('latin-1')) for a in opts]
+ args = [a.encode('latin-1') for a in args]
+ return opts, args
+
- def strkwargs(dic):
- """
- Converts the keys of a python dictonary to str i.e. unicodes so that
- they can be passed as keyword arguments as dictionaries with bytes keys
- can't be passed as keyword arguments to functions on Python 3.
- """
- dic = {k.decode('latin-1'): v for k, v in dic.items()}
- return dic
+def strkwargs(dic):
+ """
+ Converts the keys of a python dictonary to str i.e. unicodes so that
+ they can be passed as keyword arguments as dictionaries with bytes keys
+ can't be passed as keyword arguments to functions on Python 3.
+ """
+ dic = {k.decode('latin-1'): v for k, v in dic.items()}
+ return dic
- def byteskwargs(dic):
- """
- Converts keys of python dictionaries to bytes as they were converted to
- str to pass that dictonary as a keyword argument on Python 3.
- """
- dic = {k.encode('latin-1'): v for k, v in dic.items()}
- return dic
- # TODO: handle shlex.shlex().
- def shlexsplit(s, comments=False, posix=True):
- """
- Takes bytes argument, convert it to str i.e. unicodes, pass that into
- shlex.split(), convert the returned value to bytes and return that for
- Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
- """
- ret = shlex.split(s.decode('latin-1'), comments, posix)
- return [a.encode('latin-1') for a in ret]
+def byteskwargs(dic):
+ """
+ Converts keys of python dictionaries to bytes as they were converted to
+ str to pass that dictonary as a keyword argument on Python 3.
+ """
+ dic = {k.encode('latin-1'): v for k, v in dic.items()}
+ return dic
+
- iteritems = lambda x: x.items()
- itervalues = lambda x: x.values()
+# TODO: handle shlex.shlex().
+def shlexsplit(s, comments=False, posix=True):
+ """
+ Takes bytes argument, convert it to str i.e. unicodes, pass that into
+ shlex.split(), convert the returned value to bytes and return that for
+ Python 3 compatibility as shelx.split() don't accept bytes on Python 3.
+ """
+ ret = shlex.split(s.decode('latin-1'), comments, posix)
+ return [a.encode('latin-1') for a in ret]
- # Python 3.5's json.load and json.loads require str. We polyfill its
- # code for detecting encoding from bytes.
- if sys.version_info[0:2] < (3, 6):
- def _detect_encoding(b):
- bstartswith = b.startswith
- if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
- return 'utf-32'
- if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
- return 'utf-16'
- if bstartswith(codecs.BOM_UTF8):
- return 'utf-8-sig'
+iteritems = lambda x: x.items()
+itervalues = lambda x: x.values()
+
+# Python 3.5's json.load and json.loads require str. We polyfill its
+# code for detecting encoding from bytes.
+if sys.version_info[0:2] < (3, 6):
+
+ def _detect_encoding(b):
+ bstartswith = b.startswith
+ if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+ return 'utf-32'
+ if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+ return 'utf-16'
+ if bstartswith(codecs.BOM_UTF8):
+ return 'utf-8-sig'
- if len(b) >= 4:
- if not b[0]:
- # 00 00 -- -- - utf-32-be
- # 00 XX -- -- - utf-16-be
- return 'utf-16-be' if b[1] else 'utf-32-be'
- if not b[1]:
- # XX 00 00 00 - utf-32-le
- # XX 00 00 XX - utf-16-le
- # XX 00 XX -- - utf-16-le
- return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
- elif len(b) == 2:
- if not b[0]:
- # 00 XX - utf-16-be
- return 'utf-16-be'
- if not b[1]:
- # XX 00 - utf-16-le
- return 'utf-16-le'
- # default
- return 'utf-8'
+ if len(b) >= 4:
+ if not b[0]:
+ # 00 00 -- -- - utf-32-be
+ # 00 XX -- -- - utf-16-be
+ return 'utf-16-be' if b[1] else 'utf-32-be'
+ if not b[1]:
+ # XX 00 00 00 - utf-32-le
+ # XX 00 00 XX - utf-16-le
+ # XX 00 XX -- - utf-16-le
+ return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+ elif len(b) == 2:
+ if not b[0]:
+ # 00 XX - utf-16-be
+ return 'utf-16-be'
+ if not b[1]:
+ # XX 00 - utf-16-le
+ return 'utf-16-le'
+ # default
+ return 'utf-8'
- def json_loads(s, *args, **kwargs):
- if isinstance(s, (bytes, bytearray)):
- s = s.decode(_detect_encoding(s), 'surrogatepass')
+ def json_loads(s, *args, **kwargs):
+ if isinstance(s, (bytes, bytearray)):
+ s = s.decode(_detect_encoding(s), 'surrogatepass')
- return json.loads(s, *args, **kwargs)
+ return json.loads(s, *args, **kwargs)
- else:
- json_loads = json.loads
else:
- import cStringIO
-
- xrange = xrange
- unicode = unicode
- bytechr = chr
- byterepr = repr
- bytestr = str
- iterbytestr = iter
- maybebytestr = identity
- sysbytes = identity
- sysstr = identity
- strurl = identity
- bytesurl = identity
- open = open
- delattr = delattr
- getattr = getattr
- hasattr = hasattr
- setattr = setattr
-
- # this can't be parsed on Python 3
- exec(b'def raisewithtb(exc, tb):\n raise exc, None, tb\n')
-
- def fsencode(filename):
- """
- Partial backport from os.py in Python 3, which only accepts bytes.
- In Python 2, our paths should only ever be bytes, a unicode path
- indicates a bug.
- """
- if isinstance(filename, str):
- return filename
- else:
- raise TypeError("expect str, not %s" % type(filename).__name__)
-
- # In Python 2, fsdecode() has a very chance to receive bytes. So it's
- # better not to touch Python 2 part as it's already working fine.
- fsdecode = identity
-
- def getdoc(obj):
- return getattr(obj, '__doc__', None)
-
- _notset = object()
-
- def safehasattr(thing, attr):
- return getattr(thing, attr, _notset) is not _notset
-
- def _getoptbwrapper(orig, args, shortlist, namelist):
- return orig(args, shortlist, namelist)
-
- strkwargs = identity
- byteskwargs = identity
-
- oscurdir = os.curdir
- oslinesep = os.linesep
- osname = os.name
- ospathsep = os.pathsep
- ospardir = os.pardir
- ossep = os.sep
- osaltsep = os.altsep
- osdevnull = os.devnull
- long = long
- if getattr(sys, 'argv', None) is not None:
- sysargv = sys.argv
- sysplatform = sys.platform
- sysexecutable = sys.executable
- shlexsplit = shlex.split
- bytesio = cStringIO.StringIO
- stringio = bytesio
- maplist = map
- rangelist = range
- ziplist = zip
- rawinput = raw_input
- getargspec = inspect.getargspec
- iteritems = lambda x: x.iteritems()
- itervalues = lambda x: x.itervalues()
json_loads = json.loads
isjython = sysplatform.startswith(b'java')