py3: define and use json.loads polyfill
Python 3.5's json.loads() requires a str. Only Python 3.6+
supports passing a bytes or bytearray.
This commit implements a json.loads() polyfill on Python 3.5
so that we can use bytes. The added function to detect encodings
comes verbatim from Python 3.7.
--- a/hgext/bugzilla.py Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/bugzilla.py Sat Nov 02 12:09:35 2019 -0700
@@ -955,7 +955,7 @@
def _fetch(self, burl):
try:
resp = url.open(self.ui, burl)
- return json.loads(resp.read())
+ return pycompat.json_loads(resp.read())
except util.urlerr.httperror as inst:
if inst.code == 401:
raise error.Abort(_(b'authorization failed'))
@@ -978,7 +978,7 @@
req = request_type(burl, data, {b'Content-Type': b'application/json'})
try:
resp = url.opener(self.ui).open(req)
- return json.loads(resp.read())
+ return pycompat.json_loads(resp.read())
except util.urlerr.httperror as inst:
if inst.code == 401:
raise error.Abort(_(b'authorization failed'))
--- a/hgext/fix.py Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/fix.py Sat Nov 02 12:09:35 2019 -0700
@@ -126,7 +126,6 @@
import collections
import itertools
-import json
import os
import re
import subprocess
@@ -642,7 +641,7 @@
if fixer.shouldoutputmetadata():
try:
metadatajson, newerdata = stdout.split(b'\0', 1)
- metadata[fixername] = json.loads(metadatajson)
+ metadata[fixername] = pycompat.json_loads(metadatajson)
except ValueError:
ui.warn(
_(b'ignored invalid output from fixer tool: %s\n')
--- a/hgext/lfs/blobstore.py Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/lfs/blobstore.py Sat Nov 02 12:09:35 2019 -0700
@@ -363,7 +363,7 @@
_(b'LFS error: %s') % _urlerrorreason(ex), hint=hint
)
try:
- response = json.loads(rawjson)
+ response = pycompat.json_loads(rawjson)
except ValueError:
raise LfsRemoteError(
_(b'LFS server returns invalid JSON: %s')
--- a/hgext/lfs/wireprotolfsserver.py Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/lfs/wireprotolfsserver.py Sat Nov 02 12:09:35 2019 -0700
@@ -133,7 +133,7 @@
return True
# XXX: specify an encoding?
- lfsreq = json.loads(req.bodyfh.read())
+ lfsreq = pycompat.json_loads(req.bodyfh.read())
# If no transfer handlers are explicitly requested, 'basic' is assumed.
if r'basic' not in lfsreq.get(r'transfers', [r'basic']):
--- a/hgext/phabricator.py Sat Nov 02 11:48:38 2019 -0700
+++ b/hgext/phabricator.py Sat Nov 02 12:09:35 2019 -0700
@@ -152,8 +152,8 @@
value = r1params[key][0]
# we want to compare json payloads without worrying about ordering
if value.startswith(b'{') and value.endswith(b'}'):
- r1json = json.loads(value)
- r2json = json.loads(r2params[key][0])
+ r1json = pycompat.json_loads(value)
+ r2json = pycompat.json_loads(r2params[key][0])
if r1json != r2json:
return False
elif r2params[key][0] != value:
@@ -307,7 +307,7 @@
if isinstance(x, pycompat.unicode)
else x,
# json.loads only accepts bytes from py3.6+
- json.loads(encoding.unifromlocal(body)),
+ pycompat.json_loads(encoding.unifromlocal(body)),
)
if parsed.get(b'error_code'):
msg = _(b'Conduit Error (%s): %s') % (
@@ -332,7 +332,7 @@
lambda x: encoding.unitolocal(x)
if isinstance(x, pycompat.unicode)
else x,
- json.loads(rawparams),
+ pycompat.json_loads(rawparams),
)
# json.dumps only accepts unicode strings
result = pycompat.rapply(
--- a/mercurial/pycompat.py Sat Nov 02 11:48:38 2019 -0700
+++ b/mercurial/pycompat.py Sat Nov 02 12:09:35 2019 -0700
@@ -12,6 +12,7 @@
import getopt
import inspect
+import json
import os
import shlex
import sys
@@ -88,6 +89,7 @@
if ispy3:
import builtins
+ import codecs
import functools
import io
import struct
@@ -340,6 +342,48 @@
iteritems = lambda x: x.items()
itervalues = lambda x: x.values()
+ # Python 3.5's json.load and json.loads require str. We polyfill its
+ # code for detecting encoding from bytes.
+ if sys.version_info[0:2] < (3, 6):
+
+ def _detect_encoding(b):
+ bstartswith = b.startswith
+ if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)):
+ return 'utf-32'
+ if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)):
+ return 'utf-16'
+ if bstartswith(codecs.BOM_UTF8):
+ return 'utf-8-sig'
+
+ if len(b) >= 4:
+ if not b[0]:
+ # 00 00 -- -- - utf-32-be
+ # 00 XX -- -- - utf-16-be
+ return 'utf-16-be' if b[1] else 'utf-32-be'
+ if not b[1]:
+ # XX 00 00 00 - utf-32-le
+ # XX 00 00 XX - utf-16-le
+ # XX 00 XX -- - utf-16-le
+ return 'utf-16-le' if b[2] or b[3] else 'utf-32-le'
+ elif len(b) == 2:
+ if not b[0]:
+ # 00 XX - utf-16-be
+ return 'utf-16-be'
+ if not b[1]:
+ # XX 00 - utf-16-le
+ return 'utf-16-le'
+ # default
+ return 'utf-8'
+
+ def json_loads(s, *args, **kwargs):
+ if isinstance(s, (bytes, bytearray)):
+ s = s.decode(_detect_encoding(s), 'surrogatepass')
+
+ return json.loads(s, *args, **kwargs)
+
+ else:
+ json_loads = json.loads
+
else:
import cStringIO
@@ -417,6 +461,7 @@
getargspec = inspect.getargspec
iteritems = lambda x: x.iteritems()
itervalues = lambda x: x.itervalues()
+ json_loads = json.loads
isjython = sysplatform.startswith(b'java')
--- a/tests/get-with-headers.py Sat Nov 02 11:48:38 2019 -0700
+++ b/tests/get-with-headers.py Sat Nov 02 12:09:35 2019 -0700
@@ -98,7 +98,7 @@
if formatjson:
# json.dumps() will print trailing newlines. Eliminate them
# to make tests easier to write.
- data = json.loads(data)
+ data = pycompat.json_loads(data)
lines = json.dumps(data, sort_keys=True, indent=2).splitlines()
for line in lines:
bodyfh.write(pycompat.sysbytes(line.rstrip()))