Mercurial > hg
changeset 43380:579672b347d2 stable
py3: define and use json.loads polyfill
Python 3.5's json.loads() requires a str. Only Python 3.6+
supports passing a bytes or bytearray.
This commit implements a json.loads() polyfill on Python 3.5
so that we can use bytes. The added function to detect encodings
comes verbatim from Python 3.7.
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 02 Nov 2019 12:09:35 -0700 |
parents | bb509f39d387 |
children | 9a43cef9ec59 |
files | hgext/bugzilla.py hgext/fix.py hgext/lfs/blobstore.py hgext/lfs/wireprotolfsserver.py hgext/phabricator.py mercurial/pycompat.py tests/get-with-headers.py |
diffstat | 7 files changed, 55 insertions(+), 11 deletions(-) [+] |
line wrap: on
line diff
--- a/hgext/bugzilla.py Sat Nov 02 11:48:38 2019 -0700 +++ b/hgext/bugzilla.py Sat Nov 02 12:09:35 2019 -0700 @@ -955,7 +955,7 @@ def _fetch(self, burl): try: resp = url.open(self.ui, burl) - return json.loads(resp.read()) + return pycompat.json_loads(resp.read()) except util.urlerr.httperror as inst: if inst.code == 401: raise error.Abort(_(b'authorization failed')) @@ -978,7 +978,7 @@ req = request_type(burl, data, {b'Content-Type': b'application/json'}) try: resp = url.opener(self.ui).open(req) - return json.loads(resp.read()) + return pycompat.json_loads(resp.read()) except util.urlerr.httperror as inst: if inst.code == 401: raise error.Abort(_(b'authorization failed'))
--- a/hgext/fix.py Sat Nov 02 11:48:38 2019 -0700 +++ b/hgext/fix.py Sat Nov 02 12:09:35 2019 -0700 @@ -126,7 +126,6 @@ import collections import itertools -import json import os import re import subprocess @@ -642,7 +641,7 @@ if fixer.shouldoutputmetadata(): try: metadatajson, newerdata = stdout.split(b'\0', 1) - metadata[fixername] = json.loads(metadatajson) + metadata[fixername] = pycompat.json_loads(metadatajson) except ValueError: ui.warn( _(b'ignored invalid output from fixer tool: %s\n')
--- a/hgext/lfs/blobstore.py Sat Nov 02 11:48:38 2019 -0700 +++ b/hgext/lfs/blobstore.py Sat Nov 02 12:09:35 2019 -0700 @@ -363,7 +363,7 @@ _(b'LFS error: %s') % _urlerrorreason(ex), hint=hint ) try: - response = json.loads(rawjson) + response = pycompat.json_loads(rawjson) except ValueError: raise LfsRemoteError( _(b'LFS server returns invalid JSON: %s')
--- a/hgext/lfs/wireprotolfsserver.py Sat Nov 02 11:48:38 2019 -0700 +++ b/hgext/lfs/wireprotolfsserver.py Sat Nov 02 12:09:35 2019 -0700 @@ -133,7 +133,7 @@ return True # XXX: specify an encoding? - lfsreq = json.loads(req.bodyfh.read()) + lfsreq = pycompat.json_loads(req.bodyfh.read()) # If no transfer handlers are explicitly requested, 'basic' is assumed. if r'basic' not in lfsreq.get(r'transfers', [r'basic']):
--- a/hgext/phabricator.py Sat Nov 02 11:48:38 2019 -0700 +++ b/hgext/phabricator.py Sat Nov 02 12:09:35 2019 -0700 @@ -152,8 +152,8 @@ value = r1params[key][0] # we want to compare json payloads without worrying about ordering if value.startswith(b'{') and value.endswith(b'}'): - r1json = json.loads(value) - r2json = json.loads(r2params[key][0]) + r1json = pycompat.json_loads(value) + r2json = pycompat.json_loads(r2params[key][0]) if r1json != r2json: return False elif r2params[key][0] != value: @@ -307,7 +307,7 @@ if isinstance(x, pycompat.unicode) else x, # json.loads only accepts bytes from py3.6+ - json.loads(encoding.unifromlocal(body)), + pycompat.json_loads(encoding.unifromlocal(body)), ) if parsed.get(b'error_code'): msg = _(b'Conduit Error (%s): %s') % ( @@ -332,7 +332,7 @@ lambda x: encoding.unitolocal(x) if isinstance(x, pycompat.unicode) else x, - json.loads(rawparams), + pycompat.json_loads(rawparams), ) # json.dumps only accepts unicode strings result = pycompat.rapply(
--- a/mercurial/pycompat.py Sat Nov 02 11:48:38 2019 -0700 +++ b/mercurial/pycompat.py Sat Nov 02 12:09:35 2019 -0700 @@ -12,6 +12,7 @@ import getopt import inspect +import json import os import shlex import sys @@ -88,6 +89,7 @@ if ispy3: import builtins + import codecs import functools import io import struct @@ -340,6 +342,48 @@ iteritems = lambda x: x.items() itervalues = lambda x: x.values() + # Python 3.5's json.load and json.loads require str. We polyfill its + # code for detecting encoding from bytes. + if sys.version_info[0:2] < (3, 6): + + def _detect_encoding(b): + bstartswith = b.startswith + if bstartswith((codecs.BOM_UTF32_BE, codecs.BOM_UTF32_LE)): + return 'utf-32' + if bstartswith((codecs.BOM_UTF16_BE, codecs.BOM_UTF16_LE)): + return 'utf-16' + if bstartswith(codecs.BOM_UTF8): + return 'utf-8-sig' + + if len(b) >= 4: + if not b[0]: + # 00 00 -- -- - utf-32-be + # 00 XX -- -- - utf-16-be + return 'utf-16-be' if b[1] else 'utf-32-be' + if not b[1]: + # XX 00 00 00 - utf-32-le + # XX 00 00 XX - utf-16-le + # XX 00 XX -- - utf-16-le + return 'utf-16-le' if b[2] or b[3] else 'utf-32-le' + elif len(b) == 2: + if not b[0]: + # 00 XX - utf-16-be + return 'utf-16-be' + if not b[1]: + # XX 00 - utf-16-le + return 'utf-16-le' + # default + return 'utf-8' + + def json_loads(s, *args, **kwargs): + if isinstance(s, (bytes, bytearray)): + s = s.decode(_detect_encoding(s), 'surrogatepass') + + return json.loads(s, *args, **kwargs) + + else: + json_loads = json.loads + else: import cStringIO @@ -417,6 +461,7 @@ getargspec = inspect.getargspec iteritems = lambda x: x.iteritems() itervalues = lambda x: x.itervalues() + json_loads = json.loads isjython = sysplatform.startswith(b'java')
--- a/tests/get-with-headers.py Sat Nov 02 11:48:38 2019 -0700 +++ b/tests/get-with-headers.py Sat Nov 02 12:09:35 2019 -0700 @@ -98,7 +98,7 @@ if formatjson: # json.dumps() will print trailing newlines. Eliminate them # to make tests easier to write. - data = json.loads(data) + data = pycompat.json_loads(data) lines = json.dumps(data, sort_keys=True, indent=2).splitlines() for line in lines: bodyfh.write(pycompat.sysbytes(line.rstrip()))