changeset 26843:f580c78ea667

uescape: also encode non-printable char under 128 We were assuming everything under 128 was printable ascii, but there are a lot of control characters in that range that can't simply be included in json and other targets. We forcibly encode everything under 32, because they are either control char or oddly printable (like tab or line ending). We also add the hypothesis-powered test that caught this.
author Pierre-Yves David <pierre-yves.david@fb.com>
date Mon, 02 Nov 2015 11:56:59 +0000
parents 0f76c64f5cc3
children e24eee55c129
files mercurial/templatefilters.py tests/test-template-engine.t
diffstat 2 files changed, 14 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/templatefilters.py	Sat Oct 24 12:46:03 2015 +0100
+++ b/mercurial/templatefilters.py	Mon Nov 02 11:56:59 2015 +0000
@@ -219,7 +219,7 @@
         raise TypeError('cannot encode type %s' % obj.__class__.__name__)
 
 def _uescape(c):
-    if ord(c) < 0x80:
+    if 0x20 <= ord(c) < 0x80:
         return c
     else:
         return '\\u%04x' % ord(c)
--- a/tests/test-template-engine.t	Sat Oct 24 12:46:03 2015 +0100
+++ b/tests/test-template-engine.t	Mon Nov 02 11:56:59 2015 +0000
@@ -44,4 +44,17 @@
   0 97e5f848f0936960273bbf75be6388cd0350a32b -1 0000000000000000000000000000000000000000
   -1 0000000000000000000000000000000000000000 -1 0000000000000000000000000000000000000000
 
+Fuzzing the unicode escaper to ensure it produces valid data
+
+#if hypothesis
+
+  >>> from hypothesishelpers import *
+  >>> import mercurial.templatefilters as tf
+  >>> import json
+  >>> @check(st.text().map(lambda s: s.encode('utf-8')))
+  ... def testtfescapeproducesvalidjson(text):
+  ...     json.loads('"' + tf.jsonescape(text) + '"')
+
+#endif
+
   $ cd ..