Mercurial > hg
comparison contrib/python-zstandard/tests/test_compressor.py @ 40121:73fef626dae3
zstandard: vendor python-zstandard 0.10.1
This was just released.
The upstream source distribution from PyPI was extracted. Unwanted
files were removed.
The clang-format ignore list was updated to reflect the new source
of files.
setup.py was updated to pass a new argument to python-zstandard's
function for returning an Extension instance. Upstream had to change
to use relative paths because Python 3.7's packaging doesn't
seem to like absolute paths when defining sources, includes, etc.
The default relative path calculation is relative to setup_zstd.py
which is different from the directory of Mercurial's setup.py.
The project contains a vendored copy of zstandard 1.3.6. The old
version was 1.3.4.
The API should be backwards compatible and nothing in core should
need adjusted. However, there is a new "chunker" API that we
may find useful in places where we want to emit compressed chunks
of a fixed size.
There are a pair of bug fixes in 0.10.0 with regards to
compressobj() and decompressobj() when block flushing is used. I
actually found these bugs when introducing these APIs in Mercurial!
But existing Mercurial code is not affected because we don't
perform block flushing.
# no-check-commit because 3rd party code has different style guidelines
Differential Revision: https://phab.mercurial-scm.org/D4911
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Mon, 08 Oct 2018 16:27:40 -0700 |
parents | b1fb341d8a61 |
children | 675775c33ab6 |
comparison
equal
deleted
inserted
replaced
40120:89742f1fa6cb | 40121:73fef626dae3 |
---|---|
151 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) | 151 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
152 | 152 |
153 no_params = zstd.get_frame_parameters(no_dict_id) | 153 no_params = zstd.get_frame_parameters(no_dict_id) |
154 with_params = zstd.get_frame_parameters(with_dict_id) | 154 with_params = zstd.get_frame_parameters(with_dict_id) |
155 self.assertEqual(no_params.dict_id, 0) | 155 self.assertEqual(no_params.dict_id, 0) |
156 self.assertEqual(with_params.dict_id, 1387616518) | 156 self.assertEqual(with_params.dict_id, 1880053135) |
157 | 157 |
158 def test_compress_dict_multiple(self): | 158 def test_compress_dict_multiple(self): |
159 samples = [] | 159 samples = [] |
160 for i in range(128): | 160 for i in range(128): |
161 samples.append(b'foo' * 64) | 161 samples.append(b'foo' * 64) |
214 params = zstd.get_frame_parameters(result); | 214 params = zstd.get_frame_parameters(result); |
215 self.assertEqual(params.content_size, 3); | 215 self.assertEqual(params.content_size, 3); |
216 self.assertEqual(params.dict_id, d.dict_id()) | 216 self.assertEqual(params.dict_id, d.dict_id()) |
217 | 217 |
218 self.assertEqual(result, | 218 self.assertEqual(result, |
219 b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00' | 219 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00' |
220 b'\x66\x6f\x6f') | 220 b'\x66\x6f\x6f') |
221 | 221 |
222 def test_multithreaded_compression_params(self): | 222 def test_multithreaded_compression_params(self): |
223 params = zstd.ZstdCompressionParameters.from_level(0, threads=2) | 223 params = zstd.ZstdCompressionParameters.from_level(0, threads=2) |
224 cctx = zstd.ZstdCompressor(compression_params=params) | 224 cctx = zstd.ZstdCompressor(compression_params=params) |
334 self.assertEqual(cobj.compress(b'foo'), b'') | 334 self.assertEqual(cobj.compress(b'foo'), b'') |
335 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | 335 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), |
336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') | 336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') |
337 self.assertEqual(cobj.compress(b'bar'), b'') | 337 self.assertEqual(cobj.compress(b'bar'), b'') |
338 # 3 byte header plus content. | 338 # 3 byte header plus content. |
339 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar') | 339 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), |
340 b'\x18\x00\x00bar') | |
341 self.assertEqual(cobj.flush(), b'\x01\x00\x00') | |
340 | 342 |
341 def test_flush_empty_block(self): | 343 def test_flush_empty_block(self): |
342 cctx = zstd.ZstdCompressor(write_checksum=True) | 344 cctx = zstd.ZstdCompressor(write_checksum=True) |
343 cobj = cctx.compressobj() | 345 cobj = cctx.compressobj() |
344 | 346 |
574 @make_cffi | 576 @make_cffi |
575 class TestCompressor_stream_reader(unittest.TestCase): | 577 class TestCompressor_stream_reader(unittest.TestCase): |
576 def test_context_manager(self): | 578 def test_context_manager(self): |
577 cctx = zstd.ZstdCompressor() | 579 cctx = zstd.ZstdCompressor() |
578 | 580 |
579 reader = cctx.stream_reader(b'foo' * 60) | |
580 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | |
581 reader.read(10) | |
582 | |
583 with cctx.stream_reader(b'foo') as reader: | 581 with cctx.stream_reader(b'foo') as reader: |
584 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): | 582 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): |
585 with reader as reader2: | 583 with reader as reader2: |
586 pass | 584 pass |
585 | |
586 def test_no_context_manager(self): | |
587 cctx = zstd.ZstdCompressor() | |
588 | |
589 reader = cctx.stream_reader(b'foo') | |
590 reader.read(4) | |
591 self.assertFalse(reader.closed) | |
592 | |
593 reader.close() | |
594 self.assertTrue(reader.closed) | |
595 with self.assertRaisesRegexp(ValueError, 'stream is closed'): | |
596 reader.read(1) | |
587 | 597 |
588 def test_not_implemented(self): | 598 def test_not_implemented(self): |
589 cctx = zstd.ZstdCompressor() | 599 cctx = zstd.ZstdCompressor() |
590 | 600 |
591 with cctx.stream_reader(b'foo' * 60) as reader: | 601 with cctx.stream_reader(b'foo' * 60) as reader: |
617 with cctx.stream_reader(b'boo') as reader: | 627 with cctx.stream_reader(b'boo') as reader: |
618 self.assertTrue(reader.readable()) | 628 self.assertTrue(reader.readable()) |
619 self.assertFalse(reader.writable()) | 629 self.assertFalse(reader.writable()) |
620 self.assertFalse(reader.seekable()) | 630 self.assertFalse(reader.seekable()) |
621 self.assertFalse(reader.isatty()) | 631 self.assertFalse(reader.isatty()) |
632 self.assertFalse(reader.closed) | |
622 self.assertIsNone(reader.flush()) | 633 self.assertIsNone(reader.flush()) |
634 self.assertFalse(reader.closed) | |
635 | |
636 self.assertTrue(reader.closed) | |
623 | 637 |
624 def test_read_closed(self): | 638 def test_read_closed(self): |
625 cctx = zstd.ZstdCompressor() | 639 cctx = zstd.ZstdCompressor() |
626 | 640 |
627 with cctx.stream_reader(b'foo' * 60) as reader: | 641 with cctx.stream_reader(b'foo' * 60) as reader: |
628 reader.close() | 642 reader.close() |
643 self.assertTrue(reader.closed) | |
629 with self.assertRaisesRegexp(ValueError, 'stream is closed'): | 644 with self.assertRaisesRegexp(ValueError, 'stream is closed'): |
630 reader.read(10) | 645 reader.read(10) |
631 | 646 |
632 def test_read_bad_size(self): | 647 def test_read_bad_size(self): |
633 cctx = zstd.ZstdCompressor() | 648 cctx = zstd.ZstdCompressor() |
713 | 728 |
714 with cctx.stream_reader(b'foo' * 60) as reader: | 729 with cctx.stream_reader(b'foo' * 60) as reader: |
715 while reader.read(8192): | 730 while reader.read(8192): |
716 pass | 731 pass |
717 | 732 |
718 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): | 733 with self.assertRaisesRegexp(ValueError, 'stream is closed'): |
719 reader.read(10) | 734 reader.read(10) |
720 | 735 |
721 def test_bad_size(self): | 736 def test_bad_size(self): |
722 cctx = zstd.ZstdCompressor() | 737 cctx = zstd.ZstdCompressor() |
723 | 738 |
790 samples.append(b'foobar' * 64) | 805 samples.append(b'foobar' * 64) |
791 | 806 |
792 d = zstd.train_dictionary(8192, samples) | 807 d = zstd.train_dictionary(8192, samples) |
793 | 808 |
794 h = hashlib.sha1(d.as_bytes()).hexdigest() | 809 h = hashlib.sha1(d.as_bytes()).hexdigest() |
795 self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027') | 810 self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79') |
796 | 811 |
797 buffer = io.BytesIO() | 812 buffer = io.BytesIO() |
798 cctx = zstd.ZstdCompressor(level=9, dict_data=d) | 813 cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
799 with cctx.stream_writer(buffer) as compressor: | 814 with cctx.stream_writer(buffer) as compressor: |
800 self.assertEqual(compressor.write(b'foo'), 0) | 815 self.assertEqual(compressor.write(b'foo'), 0) |
806 params = zstd.get_frame_parameters(compressed) | 821 params = zstd.get_frame_parameters(compressed) |
807 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | 822 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
808 self.assertEqual(params.window_size, 2097152) | 823 self.assertEqual(params.window_size, 2097152) |
809 self.assertEqual(params.dict_id, d.dict_id()) | 824 self.assertEqual(params.dict_id, d.dict_id()) |
810 self.assertFalse(params.has_checksum) | 825 self.assertFalse(params.has_checksum) |
811 self.assertEqual(compressed, | 826 |
812 b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00' | 827 h = hashlib.sha1(compressed).hexdigest() |
813 b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89') | 828 self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786') |
829 | |
830 source = b'foo' + b'bar' + (b'foo' * 16384) | |
831 | |
832 dctx = zstd.ZstdDecompressor(dict_data=d) | |
833 | |
834 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)), | |
835 source) | |
814 | 836 |
815 def test_compression_params(self): | 837 def test_compression_params(self): |
816 params = zstd.ZstdCompressionParameters( | 838 params = zstd.ZstdCompressionParameters( |
817 window_log=20, | 839 window_log=20, |
818 chain_log=6, | 840 chain_log=6, |
1155 | 1177 |
1156 # Test another operation on errored compressor. | 1178 # Test another operation on errored compressor. |
1157 b''.join(cctx.read_to_iter(source)) | 1179 b''.join(cctx.read_to_iter(source)) |
1158 | 1180 |
1159 | 1181 |
1182 @make_cffi | |
1183 class TestCompressor_chunker(unittest.TestCase): | |
1184 def test_empty(self): | |
1185 cctx = zstd.ZstdCompressor(write_content_size=False) | |
1186 chunker = cctx.chunker() | |
1187 | |
1188 it = chunker.compress(b'') | |
1189 | |
1190 with self.assertRaises(StopIteration): | |
1191 next(it) | |
1192 | |
1193 it = chunker.finish() | |
1194 | |
1195 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00') | |
1196 | |
1197 with self.assertRaises(StopIteration): | |
1198 next(it) | |
1199 | |
1200 def test_simple_input(self): | |
1201 cctx = zstd.ZstdCompressor() | |
1202 chunker = cctx.chunker() | |
1203 | |
1204 it = chunker.compress(b'foobar') | |
1205 | |
1206 with self.assertRaises(StopIteration): | |
1207 next(it) | |
1208 | |
1209 it = chunker.compress(b'baz' * 30) | |
1210 | |
1211 with self.assertRaises(StopIteration): | |
1212 next(it) | |
1213 | |
1214 it = chunker.finish() | |
1215 | |
1216 self.assertEqual(next(it), | |
1217 b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f' | |
1218 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e') | |
1219 | |
1220 with self.assertRaises(StopIteration): | |
1221 next(it) | |
1222 | |
1223 def test_input_size(self): | |
1224 cctx = zstd.ZstdCompressor() | |
1225 chunker = cctx.chunker(size=1024) | |
1226 | |
1227 it = chunker.compress(b'x' * 1000) | |
1228 | |
1229 with self.assertRaises(StopIteration): | |
1230 next(it) | |
1231 | |
1232 it = chunker.compress(b'y' * 24) | |
1233 | |
1234 with self.assertRaises(StopIteration): | |
1235 next(it) | |
1236 | |
1237 chunks = list(chunker.finish()) | |
1238 | |
1239 self.assertEqual(chunks, [ | |
1240 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00' | |
1241 b'\xa0\x16\xe3\x2b\x80\x05' | |
1242 ]) | |
1243 | |
1244 dctx = zstd.ZstdDecompressor() | |
1245 | |
1246 self.assertEqual(dctx.decompress(b''.join(chunks)), | |
1247 (b'x' * 1000) + (b'y' * 24)) | |
1248 | |
1249 def test_small_chunk_size(self): | |
1250 cctx = zstd.ZstdCompressor() | |
1251 chunker = cctx.chunker(chunk_size=1) | |
1252 | |
1253 chunks = list(chunker.compress(b'foo' * 1024)) | |
1254 self.assertEqual(chunks, []) | |
1255 | |
1256 chunks = list(chunker.finish()) | |
1257 self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) | |
1258 | |
1259 self.assertEqual( | |
1260 b''.join(chunks), | |
1261 b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00' | |
1262 b'\xfa\xd3\x77\x43') | |
1263 | |
1264 dctx = zstd.ZstdDecompressor() | |
1265 self.assertEqual(dctx.decompress(b''.join(chunks), | |
1266 max_output_size=10000), | |
1267 b'foo' * 1024) | |
1268 | |
1269 def test_input_types(self): | |
1270 cctx = zstd.ZstdCompressor() | |
1271 | |
1272 mutable_array = bytearray(3) | |
1273 mutable_array[:] = b'foo' | |
1274 | |
1275 sources = [ | |
1276 memoryview(b'foo'), | |
1277 bytearray(b'foo'), | |
1278 mutable_array, | |
1279 ] | |
1280 | |
1281 for source in sources: | |
1282 chunker = cctx.chunker() | |
1283 | |
1284 self.assertEqual(list(chunker.compress(source)), []) | |
1285 self.assertEqual(list(chunker.finish()), [ | |
1286 b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f' | |
1287 ]) | |
1288 | |
1289 def test_flush(self): | |
1290 cctx = zstd.ZstdCompressor() | |
1291 chunker = cctx.chunker() | |
1292 | |
1293 self.assertEqual(list(chunker.compress(b'foo' * 1024)), []) | |
1294 self.assertEqual(list(chunker.compress(b'bar' * 1024)), []) | |
1295 | |
1296 chunks1 = list(chunker.flush()) | |
1297 | |
1298 self.assertEqual(chunks1, [ | |
1299 b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72' | |
1300 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02' | |
1301 ]) | |
1302 | |
1303 self.assertEqual(list(chunker.flush()), []) | |
1304 self.assertEqual(list(chunker.flush()), []) | |
1305 | |
1306 self.assertEqual(list(chunker.compress(b'baz' * 1024)), []) | |
1307 | |
1308 chunks2 = list(chunker.flush()) | |
1309 self.assertEqual(len(chunks2), 1) | |
1310 | |
1311 chunks3 = list(chunker.finish()) | |
1312 self.assertEqual(len(chunks2), 1) | |
1313 | |
1314 dctx = zstd.ZstdDecompressor() | |
1315 | |
1316 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3), | |
1317 max_output_size=10000), | |
1318 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024)) | |
1319 | |
1320 def test_compress_after_finish(self): | |
1321 cctx = zstd.ZstdCompressor() | |
1322 chunker = cctx.chunker() | |
1323 | |
1324 list(chunker.compress(b'foo')) | |
1325 list(chunker.finish()) | |
1326 | |
1327 with self.assertRaisesRegexp( | |
1328 zstd.ZstdError, | |
1329 'cannot call compress\(\) after compression finished'): | |
1330 list(chunker.compress(b'foo')) | |
1331 | |
1332 def test_flush_after_finish(self): | |
1333 cctx = zstd.ZstdCompressor() | |
1334 chunker = cctx.chunker() | |
1335 | |
1336 list(chunker.compress(b'foo')) | |
1337 list(chunker.finish()) | |
1338 | |
1339 with self.assertRaisesRegexp( | |
1340 zstd.ZstdError, | |
1341 'cannot call flush\(\) after compression finished'): | |
1342 list(chunker.flush()) | |
1343 | |
1344 def test_finish_after_finish(self): | |
1345 cctx = zstd.ZstdCompressor() | |
1346 chunker = cctx.chunker() | |
1347 | |
1348 list(chunker.compress(b'foo')) | |
1349 list(chunker.finish()) | |
1350 | |
1351 with self.assertRaisesRegexp( | |
1352 zstd.ZstdError, | |
1353 'cannot call finish\(\) after compression finished'): | |
1354 list(chunker.finish()) | |
1355 | |
1356 | |
1160 class TestCompressor_multi_compress_to_buffer(unittest.TestCase): | 1357 class TestCompressor_multi_compress_to_buffer(unittest.TestCase): |
1161 def test_invalid_inputs(self): | 1358 def test_invalid_inputs(self): |
1162 cctx = zstd.ZstdCompressor() | 1359 cctx = zstd.ZstdCompressor() |
1163 | 1360 |
1164 with self.assertRaises(TypeError): | 1361 with self.assertRaises(TypeError): |