diff mercurial/pycompat.py @ 43380:579672b347d2 stable

py3: define and use json.loads polyfill Python 3.5's json.loads() requires a str. Only Python 3.6+ supports passing a bytes or bytearray. This commit implements a json.loads() polyfill on Python 3.5 so that we can use bytes. The added function to detect encodings comes verbatim from Python 3.7.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 02 Nov 2019 12:09:35 -0700
parents 8ff1ecfadcd1
children 93f74a7d3f07
line wrap: on
line diff
--- a/mercurial/pycompat.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/mercurial/pycompat.py	Sat Nov 02 12:09:35 2019 -0700
@@ -12,6 +12,7 @@
 
 import getopt
 import inspect
+import json
 import os
 import shlex
 import sys
@@ -88,6 +89,7 @@
 
 if ispy3:
     import builtins
+    import codecs
     import functools
     import io
     import struct
@@ -340,6 +342,48 @@
     iteritems = lambda x: x.items()
     itervalues = lambda x: x.values()
 
+    # Python 3.5's json.load and json.loads require str. We polyfill its
+    # code for detecting encoding from bytes.
+    if sys.version_info[0:2] < (3, 6):
+
+        def _detect_encoding(b):
+            bstartswith = b.startswith
+            if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+                return 'utf-32'
+            if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+                return 'utf-16'
+            if bstartswith(codecs.BOM_UTF8):
+                return 'utf-8-sig'
+
+            if len(b) >= 4:
+                if not b[0]:
+                    # 00 00 -- -- - utf-32-be
+                    # 00 XX -- -- - utf-16-be
+                    return 'utf-16-be' if b[1] else 'utf-32-be'
+                if not b[1]:
+                    # XX 00 00 00 - utf-32-le
+                    # XX 00 00 XX - utf-16-le
+                    # XX 00 XX -- - utf-16-le
+                    return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+            elif len(b) == 2:
+                if not b[0]:
+                    # 00 XX - utf-16-be
+                    return 'utf-16-be'
+                if not b[1]:
+                    # XX 00 - utf-16-le
+                    return 'utf-16-le'
+            # default
+            return 'utf-8'
+
+        def json_loads(s, *args, **kwargs):
+            if isinstance(s, (bytes, bytearray)):
+                s = s.decode(_detect_encoding(s), 'surrogatepass')
+
+            return json.loads(s, *args, **kwargs)
+
+    else:
+        json_loads = json.loads
+
 else:
     import cStringIO
 
@@ -417,6 +461,7 @@
     getargspec = inspect.getargspec
     iteritems = lambda x: x.iteritems()
     itervalues = lambda x: x.itervalues()
+    json_loads = json.loads
 
 isjython = sysplatform.startswith(b'java')