changeset 22426:f6b533e64ed6

encoding: add json escaping filter This ends up here because it needs to be somewhat encoding aware.
author Matt Mackall <mpm@selenic.com>
date Mon, 15 Sep 2014 13:12:49 -0500
parents 6fd944c204a9
children bd15932846a4
files mercurial/encoding.py
diffstat 1 files changed, 43 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/encoding.py	Mon Sep 15 13:12:20 2014 -0500
+++ b/mercurial/encoding.py	Mon Sep 15 13:12:49 2014 -0500
@@ -302,6 +302,49 @@
     except LookupError, k:
         raise error.Abort(k, hint="please check your locale settings")
 
+_jsonmap = {}
+
+def jsonescape(s):
+    '''returns a string suitable for JSON
+
+    JSON is problematic for us because it doesn't support non-Unicode
+    bytes. To deal with this, we take the following approach:
+
+    - localstr objects are converted back to UTF-8
+    - valid UTF-8/ASCII strings are passed as-is
+    - other strings are converted to UTF-8b surrogate encoding
+    - apply JSON-specified string escaping
+
+    (escapes are doubled in these tests)
+
+    >>> jsonescape('this is a test')
+    'this is a test'
+    >>> jsonescape('escape characters: \\0 \\x0b \\t \\n \\r \\" \\\\')
+    'escape characters: \\\\u0000 \\\\u000b \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
+    >>> jsonescape('a weird byte: \\xdd')
+    'a weird byte: \\xed\\xb3\\x9d'
+    >>> jsonescape('utf-8: caf\\xc3\\xa9')
+    'utf-8: caf\\xc3\\xa9'
+    >>> jsonescape('')
+    ''
+    '''
+
+    if not _jsonmap:
+        for x in xrange(32):
+            _jsonmap[chr(x)] = "\u%04x" %x
+        for x in xrange(32, 256):
+            c = chr(x)
+            _jsonmap[c] = c
+        _jsonmap['\t'] = '\\t'
+        _jsonmap['\n'] = '\\n'
+        _jsonmap['\"'] = '\\"'
+        _jsonmap['\\'] = '\\\\'
+        _jsonmap['\b'] = '\\b'
+        _jsonmap['\f'] = '\\f'
+        _jsonmap['\r'] = '\\r'
+
+    return ''.join(_jsonmap[c] for c in toutf8b(s))
+
 def toutf8b(s):
     '''convert a local, possibly-binary string into UTF-8b