changeset 43380:579672b347d2 stable

py3: define and use json.loads polyfill Python 3.5's json.loads() requires a str. Only Python 3.6+ supports passing a bytes or bytearray. This commit implements a json.loads() polyfill on Python 3.5 so that we can use bytes. The added function to detect encodings comes verbatim from Python 3.7.
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 02 Nov 2019 12:09:35 -0700
parents bb509f39d387
children 9a43cef9ec59
files hgext/bugzilla.py hgext/fix.py hgext/lfs/blobstore.py hgext/lfs/wireprotolfsserver.py hgext/phabricator.py mercurial/pycompat.py tests/get-with-headers.py
diffstat 7 files changed, 55 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/hgext/bugzilla.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/bugzilla.py	Sat Nov 02 12:09:35 2019 -0700
@@ -955,7 +955,7 @@
     def _fetch(self, burl):
         try:
             resp = url.open(self.ui, burl)
-            return json.loads(resp.read())
+            return pycompat.json_loads(resp.read())
         except util.urlerr.httperror as inst:
             if inst.code == 401:
                 raise error.Abort(_(b'authorization failed'))
@@ -978,7 +978,7 @@
         req = request_type(burl, data, {b'Content-Type': b'application/json'})
         try:
             resp = url.opener(self.ui).open(req)
-            return json.loads(resp.read())
+            return pycompat.json_loads(resp.read())
         except util.urlerr.httperror as inst:
             if inst.code == 401:
                 raise error.Abort(_(b'authorization failed'))
--- a/hgext/fix.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/fix.py	Sat Nov 02 12:09:35 2019 -0700
@@ -126,7 +126,6 @@
 
 import collections
 import itertools
-import json
 import os
 import re
 import subprocess
@@ -642,7 +641,7 @@
             if fixer.shouldoutputmetadata():
                 try:
                     metadatajson, newerdata = stdout.split(b'\0', 1)
-                    metadata[fixername] = json.loads(metadatajson)
+                    metadata[fixername] = pycompat.json_loads(metadatajson)
                 except ValueError:
                     ui.warn(
                         _(b'ignored invalid output from fixer tool: %s\n')
--- a/hgext/lfs/blobstore.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/lfs/blobstore.py	Sat Nov 02 12:09:35 2019 -0700
@@ -363,7 +363,7 @@
                 _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
             )
         try:
-            response = json.loads(rawjson)
+            response = pycompat.json_loads(rawjson)
         except ValueError:
             raise LfsRemoteError(
                 _(b'LFS server returns invalid JSON: %s')
--- a/hgext/lfs/wireprotolfsserver.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/lfs/wireprotolfsserver.py	Sat Nov 02 12:09:35 2019 -0700
@@ -133,7 +133,7 @@
         return True
 
     # XXX: specify an encoding?
-    lfsreq = json.loads(req.bodyfh.read())
+    lfsreq = pycompat.json_loads(req.bodyfh.read())
 
     # If no transfer handlers are explicitly requested, 'basic' is assumed.
     if r'basic' not in lfsreq.get(r'transfers', [r'basic']):
--- a/hgext/phabricator.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/phabricator.py	Sat Nov 02 12:09:35 2019 -0700
@@ -152,8 +152,8 @@
             value = r1params[key][0]
             # we want to compare json payloads without worrying about ordering
             if value.startswith(b'{') and value.endswith(b'}'):
-                r1json = json.loads(value)
-                r2json = json.loads(r2params[key][0])
+                r1json = pycompat.json_loads(value)
+                r2json = pycompat.json_loads(r2params[key][0])
                 if r1json != r2json:
                     return False
             elif r2params[key][0] != value:
@@ -307,7 +307,7 @@
         if isinstance(x, pycompat.unicode)
         else x,
         # json.loads only accepts bytes from py3.6+
-        json.loads(encoding.unifromlocal(body)),
+        pycompat.json_loads(encoding.unifromlocal(body)),
     )
     if parsed.get(b'error_code'):
         msg = _(b'Conduit Error (%s): %s') % (
@@ -332,7 +332,7 @@
         lambda x: encoding.unitolocal(x)
         if isinstance(x, pycompat.unicode)
         else x,
-        json.loads(rawparams),
+        pycompat.json_loads(rawparams),
     )
     # json.dumps only accepts unicode strings
     result = pycompat.rapply(
--- a/mercurial/pycompat.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/mercurial/pycompat.py	Sat Nov 02 12:09:35 2019 -0700
@@ -12,6 +12,7 @@
 
 import getopt
 import inspect
+import json
 import os
 import shlex
 import sys
@@ -88,6 +89,7 @@
 
 if ispy3:
     import builtins
+    import codecs
     import functools
     import io
     import struct
@@ -340,6 +342,48 @@
     iteritems = lambda x: x.items()
     itervalues = lambda x: x.values()
 
+    # Python 3.5's json.load and json.loads require str. We polyfill its
+    # code for detecting encoding from bytes.
+    if sys.version_info[0:2] < (3, 6):
+
+        def _detect_encoding(b):
+            bstartswith = b.startswith
+            if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+                return 'utf-32'
+            if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+                return 'utf-16'
+            if bstartswith(codecs.BOM_UTF8):
+                return 'utf-8-sig'
+
+            if len(b) >= 4:
+                if not b[0]:
+                    # 00 00 -- -- - utf-32-be
+                    # 00 XX -- -- - utf-16-be
+                    return 'utf-16-be' if b[1] else 'utf-32-be'
+                if not b[1]:
+                    # XX 00 00 00 - utf-32-le
+                    # XX 00 00 XX - utf-16-le
+                    # XX 00 XX -- - utf-16-le
+                    return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+            elif len(b) == 2:
+                if not b[0]:
+                    # 00 XX - utf-16-be
+                    return 'utf-16-be'
+                if not b[1]:
+                    # XX 00 - utf-16-le
+                    return 'utf-16-le'
+            # default
+            return 'utf-8'
+
+        def json_loads(s, *args, **kwargs):
+            if isinstance(s, (bytes, bytearray)):
+                s = s.decode(_detect_encoding(s), 'surrogatepass')
+
+            return json.loads(s, *args, **kwargs)
+
+    else:
+        json_loads = json.loads
+
 else:
     import cStringIO
 
@@ -417,6 +461,7 @@
     getargspec = inspect.getargspec
     iteritems = lambda x: x.iteritems()
     itervalues = lambda x: x.itervalues()
+    json_loads = json.loads
 
 isjython = sysplatform.startswith(b'java')
 
--- a/tests/get-with-headers.py	Sat Nov 02 11:48:38 2019 -0700
+++ b/tests/get-with-headers.py	Sat Nov 02 12:09:35 2019 -0700
@@ -98,7 +98,7 @@
         if formatjson:
             # json.dumps() will print trailing newlines. Eliminate them
             # to make tests easier to write.
-            data = json.loads(data)
+            data = pycompat.json_loads(data)
             lines = json.dumps(data, sort_keys=True, indent=2).splitlines()
             for line in lines:
                 bodyfh.write(pycompat.sysbytes(line.rstrip()))