Mercurial > hg
comparison tests/test-cbor.py @ 37711:65a23cc8e75b
cborutil: implement support for streaming encoding, bytestring decoding
The vendored cbor2 package is... a bit disappointing.
On the encoding side, it insists that you pass it something with
a write() to send data to. That means if you want to emit data to
a generator, you have to construct an e.g. io.BytesIO(), write()
to it, then get the data back out. There can be non-trivial overhead
involved.
The encoder also doesn't support indefinite types - bytestrings, arrays,
and maps that don't have a known length. Again, this is really
unfortunate because it requires you to buffer the entire source and
destination in memory to encode large things.
On the decoding side, it supports reading indefinite length types.
But it buffers them completely before returning. More sadness.
This commit implements "streaming" encoders for various CBOR types.
Encoding emits a generator of hunks. So you can efficiently stream
encoded data elsewhere.
It also implements support for emitting indefinite length bytestrings,
arrays, and maps.
On the decoding side, we only implement support for decoding an
indefinite length bytestring from a file object. It will emit a
generator of raw chunks from the source.
I didn't want to reinvent so many wheels. But profiling the wire
protocol revealed that the overhead of constructing io.BytesIO()
instances to temporarily hold results has a non-trivial overhead.
We're talking >15% of execution time for operations like
"transfer the fulltexts of all files in a revision." So I can
justify this effort.
Fortunately, CBOR is a relatively straightforward format. And we have
a reference implementation in the repo we can test against.
Differential Revision: https://phab.mercurial-scm.org/D3303
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 14 Apr 2018 16:36:15 -0700 |
parents | |
children | 2b3b6187c316 |
comparison
equal
deleted
inserted
replaced
37710:0a5fe2a08e82 | 37711:65a23cc8e75b |
---|---|
1 from __future__ import absolute_import | |
2 | |
3 import io | |
4 import unittest | |
5 | |
6 from mercurial.thirdparty import ( | |
7 cbor, | |
8 ) | |
9 from mercurial.utils import ( | |
10 cborutil, | |
11 ) | |
12 | |
13 def loadit(it): | |
14 return cbor.loads(b''.join(it)) | |
15 | |
16 class BytestringTests(unittest.TestCase): | |
17 def testsimple(self): | |
18 self.assertEqual( | |
19 list(cborutil.streamencode(b'foobar')), | |
20 [b'\x46', b'foobar']) | |
21 | |
22 self.assertEqual( | |
23 loadit(cborutil.streamencode(b'foobar')), | |
24 b'foobar') | |
25 | |
26 def testlong(self): | |
27 source = b'x' * 1048576 | |
28 | |
29 self.assertEqual(loadit(cborutil.streamencode(source)), source) | |
30 | |
31 def testfromiter(self): | |
32 # This is the example from RFC 7049 Section 2.2.2. | |
33 source = [b'\xaa\xbb\xcc\xdd', b'\xee\xff\x99'] | |
34 | |
35 self.assertEqual( | |
36 list(cborutil.streamencodebytestringfromiter(source)), | |
37 [ | |
38 b'\x5f', | |
39 b'\x44', | |
40 b'\xaa\xbb\xcc\xdd', | |
41 b'\x43', | |
42 b'\xee\xff\x99', | |
43 b'\xff', | |
44 ]) | |
45 | |
46 self.assertEqual( | |
47 loadit(cborutil.streamencodebytestringfromiter(source)), | |
48 b''.join(source)) | |
49 | |
50 def testfromiterlarge(self): | |
51 source = [b'a' * 16, b'b' * 128, b'c' * 1024, b'd' * 1048576] | |
52 | |
53 self.assertEqual( | |
54 loadit(cborutil.streamencodebytestringfromiter(source)), | |
55 b''.join(source)) | |
56 | |
57 def testindefinite(self): | |
58 source = b'\x00\x01\x02\x03' + b'\xff' * 16384 | |
59 | |
60 it = cborutil.streamencodeindefinitebytestring(source, chunksize=2) | |
61 | |
62 self.assertEqual(next(it), b'\x5f') | |
63 self.assertEqual(next(it), b'\x42') | |
64 self.assertEqual(next(it), b'\x00\x01') | |
65 self.assertEqual(next(it), b'\x42') | |
66 self.assertEqual(next(it), b'\x02\x03') | |
67 self.assertEqual(next(it), b'\x42') | |
68 self.assertEqual(next(it), b'\xff\xff') | |
69 | |
70 dest = b''.join(cborutil.streamencodeindefinitebytestring( | |
71 source, chunksize=42)) | |
72 self.assertEqual(cbor.loads(dest), b''.join(source)) | |
73 | |
74 def testreadtoiter(self): | |
75 source = io.BytesIO(b'\x5f\x44\xaa\xbb\xcc\xdd\x43\xee\xff\x99\xff') | |
76 | |
77 it = cborutil.readindefinitebytestringtoiter(source) | |
78 self.assertEqual(next(it), b'\xaa\xbb\xcc\xdd') | |
79 self.assertEqual(next(it), b'\xee\xff\x99') | |
80 | |
81 with self.assertRaises(StopIteration): | |
82 next(it) | |
83 | |
84 class IntTests(unittest.TestCase): | |
85 def testsmall(self): | |
86 self.assertEqual(list(cborutil.streamencode(0)), [b'\x00']) | |
87 self.assertEqual(list(cborutil.streamencode(1)), [b'\x01']) | |
88 self.assertEqual(list(cborutil.streamencode(2)), [b'\x02']) | |
89 self.assertEqual(list(cborutil.streamencode(3)), [b'\x03']) | |
90 self.assertEqual(list(cborutil.streamencode(4)), [b'\x04']) | |
91 | |
92 def testnegativesmall(self): | |
93 self.assertEqual(list(cborutil.streamencode(-1)), [b'\x20']) | |
94 self.assertEqual(list(cborutil.streamencode(-2)), [b'\x21']) | |
95 self.assertEqual(list(cborutil.streamencode(-3)), [b'\x22']) | |
96 self.assertEqual(list(cborutil.streamencode(-4)), [b'\x23']) | |
97 self.assertEqual(list(cborutil.streamencode(-5)), [b'\x24']) | |
98 | |
99 def testrange(self): | |
100 for i in range(-70000, 70000, 10): | |
101 self.assertEqual( | |
102 b''.join(cborutil.streamencode(i)), | |
103 cbor.dumps(i)) | |
104 | |
105 class ArrayTests(unittest.TestCase): | |
106 def testempty(self): | |
107 self.assertEqual(list(cborutil.streamencode([])), [b'\x80']) | |
108 self.assertEqual(loadit(cborutil.streamencode([])), []) | |
109 | |
110 def testbasic(self): | |
111 source = [b'foo', b'bar', 1, -10] | |
112 | |
113 self.assertEqual(list(cborutil.streamencode(source)), [ | |
114 b'\x84', b'\x43', b'foo', b'\x43', b'bar', b'\x01', b'\x29']) | |
115 | |
116 def testemptyfromiter(self): | |
117 self.assertEqual(b''.join(cborutil.streamencodearrayfromiter([])), | |
118 b'\x9f\xff') | |
119 | |
120 def testfromiter1(self): | |
121 source = [b'foo'] | |
122 | |
123 self.assertEqual(list(cborutil.streamencodearrayfromiter(source)), [ | |
124 b'\x9f', | |
125 b'\x43', b'foo', | |
126 b'\xff', | |
127 ]) | |
128 | |
129 dest = b''.join(cborutil.streamencodearrayfromiter(source)) | |
130 self.assertEqual(cbor.loads(dest), source) | |
131 | |
132 def testtuple(self): | |
133 source = (b'foo', None, 42) | |
134 | |
135 self.assertEqual(cbor.loads(b''.join(cborutil.streamencode(source))), | |
136 list(source)) | |
137 | |
138 class SetTests(unittest.TestCase): | |
139 def testempty(self): | |
140 self.assertEqual(list(cborutil.streamencode(set())), [ | |
141 b'\xd9\x01\x02', | |
142 b'\x80', | |
143 ]) | |
144 | |
145 def testset(self): | |
146 source = {b'foo', None, 42} | |
147 | |
148 self.assertEqual(cbor.loads(b''.join(cborutil.streamencode(source))), | |
149 source) | |
150 | |
151 class BoolTests(unittest.TestCase): | |
152 def testbasic(self): | |
153 self.assertEqual(list(cborutil.streamencode(True)), [b'\xf5']) | |
154 self.assertEqual(list(cborutil.streamencode(False)), [b'\xf4']) | |
155 | |
156 self.assertIs(loadit(cborutil.streamencode(True)), True) | |
157 self.assertIs(loadit(cborutil.streamencode(False)), False) | |
158 | |
159 class NoneTests(unittest.TestCase): | |
160 def testbasic(self): | |
161 self.assertEqual(list(cborutil.streamencode(None)), [b'\xf6']) | |
162 | |
163 self.assertIs(loadit(cborutil.streamencode(None)), None) | |
164 | |
165 class MapTests(unittest.TestCase): | |
166 def testempty(self): | |
167 self.assertEqual(list(cborutil.streamencode({})), [b'\xa0']) | |
168 self.assertEqual(loadit(cborutil.streamencode({})), {}) | |
169 | |
170 def testemptyindefinite(self): | |
171 self.assertEqual(list(cborutil.streamencodemapfromiter([])), [ | |
172 b'\xbf', b'\xff']) | |
173 | |
174 self.assertEqual(loadit(cborutil.streamencodemapfromiter([])), {}) | |
175 | |
176 def testone(self): | |
177 source = {b'foo': b'bar'} | |
178 self.assertEqual(list(cborutil.streamencode(source)), [ | |
179 b'\xa1', b'\x43', b'foo', b'\x43', b'bar']) | |
180 | |
181 self.assertEqual(loadit(cborutil.streamencode(source)), source) | |
182 | |
183 def testmultiple(self): | |
184 source = { | |
185 b'foo': b'bar', | |
186 b'baz': b'value1', | |
187 } | |
188 | |
189 self.assertEqual(loadit(cborutil.streamencode(source)), source) | |
190 | |
191 self.assertEqual( | |
192 loadit(cborutil.streamencodemapfromiter(source.items())), | |
193 source) | |
194 | |
195 def testcomplex(self): | |
196 source = { | |
197 b'key': 1, | |
198 2: -10, | |
199 } | |
200 | |
201 self.assertEqual(loadit(cborutil.streamencode(source)), | |
202 source) | |
203 | |
204 self.assertEqual( | |
205 loadit(cborutil.streamencodemapfromiter(source.items())), | |
206 source) | |
207 | |
208 if __name__ == '__main__': | |
209 import silenttestrunner | |
210 silenttestrunner.main(__name__) |