wireproto: convert python literal to object without using unsafe eval()
Follows up
cc5a040fe150.
At this point, I don't think we need a real eval(). If we want to support
a set literal, maybe we can vendor ast.literal_eval(), which is relatively
simple function.
--- a/mercurial/utils/stringutil.py Sun Apr 08 12:30:59 2018 +0900
+++ b/mercurial/utils/stringutil.py Sun Apr 08 11:55:46 2018 +0900
@@ -9,7 +9,7 @@
from __future__ import absolute_import
-import __future__
+import ast
import codecs
import re as remod
import textwrap
@@ -499,28 +499,7 @@
"""
return _booleans.get(s.lower(), None)
-def evalpython(s):
- """Evaluate a string containing a Python expression.
-
- THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE SHOULD BE
- LIMITED TO DEVELOPER-FACING FUNCTIONALITY.
- """
- globs = {
- r'__builtins__': {
- r'None': None,
- r'False': False,
- r'True': True,
- r'int': int,
- r'set': set,
- r'tuple': tuple,
- # Don't need to expose dict and list because we can use
- # literals.
- },
- }
-
- # We can't use eval() directly because it inherits compiler
- # flags from this module and we need unicode literals for Python 3
- # compatibility.
- code = compile(s, r'<string>', r'eval',
- __future__.unicode_literals.compiler_flag, True)
- return eval(code, globs, {})
+def evalpythonliteral(s):
+ """Evaluate a string containing a Python literal expression"""
+ # We could backport our tokenizer hack to rewrite '' to u'' if we want
+ return ast.literal_eval(s)
--- a/mercurial/wireprotoframing.py Sun Apr 08 12:30:59 2018 +0900
+++ b/mercurial/wireprotoframing.py Sun Apr 08 11:55:46 2018 +0900
@@ -180,9 +180,6 @@
def makeframefromhumanstring(s):
"""Create a frame from a human readable string
- DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL
- eval() USAGE. DO NOT USE IN CORE.
-
Strings have the form:
<request-id> <stream-id> <stream-flags> <type> <flags> <payload>
@@ -198,7 +195,7 @@
Flags can be delimited by `|` to bitwise OR them together.
If the payload begins with ``cbor:``, the following string will be
- evaluated as Python code and the resulting object will be fed into
+ evaluated as Python literal and the resulting object will be fed into
a CBOR encoder. Otherwise, the payload is interpreted as a Python
byte string literal.
"""
@@ -229,7 +226,8 @@
finalflags |= int(flag)
if payload.startswith(b'cbor:'):
- payload = cbor.dumps(stringutil.evalpython(payload[5:]), canonical=True)
+ payload = cbor.dumps(stringutil.evalpythonliteral(payload[5:]),
+ canonical=True)
else:
payload = stringutil.unescapestr(payload)
--- a/tests/test-wireproto-serverreactor.py Sun Apr 08 12:30:59 2018 +0900
+++ b/tests/test-wireproto-serverreactor.py Sun Apr 08 11:55:46 2018 +0900
@@ -70,10 +70,6 @@
b'\x05\x00\x00\x01\x00\x01\x00\x10:\x00\x05:\r')
def testcborstrings(self):
- # String literals should be unicode.
- self.assertEqual(ffs(b"1 1 0 1 0 cbor:'foo'"),
- b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo')
-
self.assertEqual(ffs(b"1 1 0 1 0 cbor:b'foo'"),
b'\x04\x00\x00\x01\x00\x01\x00\x10Cfoo')