Mercurial > hg
changeset 37290:cc5a040fe150
wireproto: syntax for encoding CBOR into frames
We just vendored a library for encoding and decoding the CBOR
data format. While the intent of that vendor was to support state
files, CBOR is really a nice data format. It is extensible and
compact.
I've been feeling dirty inventing my own data formats for
frame payloads. While custom formats can always beat out a generic
format, there is a cost to be paid in terms of implementation,
comprehension, etc. CBOR is compact enough that I'm not too
worried about efficiency loss. I think the benefits of using
a standardized format outweigh rolling our own formats. So
I plan to make heavy use of CBOR in the wire protocol going
forward.
This commit introduces support for encoding CBOR data in frame
payloads to our function to make a frame from a human string.
We do need to employ some low-level Python code in order to
evaluate a string as a Python expression. But other than that,
this should hopefully be pretty straightforward.
Unit tests for this function have been added.
Differential Revision: https://phab.mercurial-scm.org/D2948
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Wed, 28 Mar 2018 15:05:39 -0700 |
parents | 5fadc63ac99f |
children | b0041036214e |
files | mercurial/debugcommands.py mercurial/utils/stringutil.py mercurial/wireprotoframing.py tests/test-wireproto-serverreactor.py |
diffstat | 4 files changed, 98 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/debugcommands.py Mon Mar 26 13:59:56 2018 -0700 +++ b/mercurial/debugcommands.py Wed Mar 28 15:05:39 2018 -0700 @@ -2793,7 +2793,10 @@ or a flag name for stream flags or frame flags, respectively. Values are resolved to integers and then bitwise OR'd together. - ``payload`` is is evaluated as a Python byte string literal. + ``payload`` represents the raw frame payload. If it begins with + ``cbor:``, the following string is evaluated as Python code and the + resulting object is fed into a CBOR encoder. Otherwise it is interpreted + as a Python byte string literal. """ opts = pycompat.byteskwargs(opts)
--- a/mercurial/utils/stringutil.py Mon Mar 26 13:59:56 2018 -0700 +++ b/mercurial/utils/stringutil.py Wed Mar 28 15:05:39 2018 -0700 @@ -9,6 +9,7 @@ from __future__ import absolute_import +import __future__ import codecs import re as remod import textwrap @@ -497,3 +498,29 @@ If s is not a valid boolean, returns None. """ return _booleans.get(s.lower(), None) + +def evalpython(s): + """Evaluate a string containing a Python expression. + + THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE SHOULD BE + LIMITED TO DEVELOPER-FACING FUNCTIONALITY. + """ + globs = { + r'__builtins__': { + r'None': None, + r'False': False, + r'True': True, + r'int': int, + r'set': set, + r'tuple': tuple, + # Don't need to expose dict and list because we can use + # literals. + }, + } + + # We can't use eval() directly because it inherits compiler + # flags from this module and we need unicode literals for Python 3 + # compatibility. + code = compile(s, r'<string>', r'eval', + __future__.unicode_literals.compiler_flag, True) + return eval(code, globs, {})
--- a/mercurial/wireprotoframing.py Mon Mar 26 13:59:56 2018 -0700 +++ b/mercurial/wireprotoframing.py Wed Mar 28 15:05:39 2018 -0700 @@ -16,6 +16,7 @@ from .i18n import _ from .thirdparty import ( attr, + cbor, ) from . import ( error, @@ -156,6 +157,9 @@ def makeframefromhumanstring(s): """Create a frame from a human readable string + DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL + eval() USAGE. DO NOT USE IN CORE. + Strings have the form: <request-id> <stream-id> <stream-flags> <type> <flags> <payload> @@ -169,6 +173,11 @@ named constant. Flags can be delimited by `|` to bitwise OR them together. + + If the payload begins with ``cbor:``, the following string will be + evaluated as Python code and the resulting object will be fed into + a CBOR encoder. Otherwise, the payload is interpreted as a Python + byte string literal. """ fields = s.split(b' ', 5) requestid, streamid, streamflags, frametype, frameflags, payload = fields @@ -196,7 +205,11 @@ else: finalflags |= int(flag) - payload = stringutil.unescapestr(payload) + if payload.startswith(b'cbor:'): + payload = cbor.dumps(stringutil.evalpython(payload[5:]), canonical=True) + + else: + payload = stringutil.unescapestr(payload) return makeframe(requestid=requestid, streamid=streamid, streamflags=finalstreamflags, typeid=frametype,
--- a/tests/test-wireproto-serverreactor.py Mon Mar 26 13:59:56 2018 -0700 +++ b/tests/test-wireproto-serverreactor.py Wed Mar 28 15:05:39 2018 -0700 @@ -35,6 +35,59 @@ framing.createcommandframes(stream, rid, cmd, args, datafh)) +class FrameHumanStringTests(unittest.TestCase): + def testbasic(self): + self.assertEqual(ffs(b'1 1 0 1 0 '), + b'\x00\x00\x00\x01\x00\x01\x00\x10') + + self.assertEqual(ffs(b'2 4 0 1 0 '), + b'\x00\x00\x00\x02\x00\x04\x00\x10') + + self.assertEqual(ffs(b'2 4 0 1 0 foo'), + b'\x03\x00\x00\x02\x00\x04\x00\x10foo') + + def testcborint(self): + self.assertEqual(ffs(b'1 1 0 1 0 cbor:15'), + b'\x01\x00\x00\x01\x00\x01\x00\x10\x0f') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:42'), + b'\x02\x00\x00\x01\x00\x01\x00\x10\x18*') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:1048576'), + b'\x05\x00\x00\x01\x00\x01\x00\x10\x1a' + b'\x00\x10\x00\x00') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:0'), + b'\x01\x00\x00\x01\x00\x01\x00\x10\x00') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:-1'), + b'\x01\x00\x00\x01\x00\x01\x00\x10 ') + + self.assertEqual(ffs(b'1 1 0 1 0 cbor:-342542'), + b'\x05\x00\x00\x01\x00\x01\x00\x10:\x00\x05:\r') + + def testcborstrings(self): + # String literals should be unicode. + self.assertEqual(ffs(b"1 1 0 1 0 cbor:'foo'"), + b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo') + + self.assertEqual(ffs(b"1 1 0 1 0 cbor:b'foo'"), + b'\x04\x00\x00\x01\x00\x01\x00\x10Cfoo') + + self.assertEqual(ffs(b"1 1 0 1 0 cbor:u'foo'"), + b'\x04\x00\x00\x01\x00\x01\x00\x10cfoo') + + def testcborlists(self): + self.assertEqual(ffs(b"1 1 0 1 0 cbor:[None, True, False, 42, b'foo']"), + b'\n\x00\x00\x01\x00\x01\x00\x10\x85\xf6\xf5\xf4' + b'\x18*Cfoo') + + def testcbordicts(self): + self.assertEqual(ffs(b"1 1 0 1 0 " + b"cbor:{b'foo': b'val1', b'bar': b'val2'}"), + b'\x13\x00\x00\x01\x00\x01\x00\x10\xa2' + b'CbarDval2CfooDval1') + class FrameTests(unittest.TestCase): def testdataexactframesize(self): data = util.bytesio(b'x' * framing.DEFAULT_MAX_FRAME_SIZE)