comparison contrib/python-zstandard/tests/test_compressor.py @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
151 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) 151 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
152 152
153 no_params = zstd.get_frame_parameters(no_dict_id) 153 no_params = zstd.get_frame_parameters(no_dict_id)
154 with_params = zstd.get_frame_parameters(with_dict_id) 154 with_params = zstd.get_frame_parameters(with_dict_id)
155 self.assertEqual(no_params.dict_id, 0) 155 self.assertEqual(no_params.dict_id, 0)
156 self.assertEqual(with_params.dict_id, 1387616518) 156 self.assertEqual(with_params.dict_id, 1880053135)
157 157
158 def test_compress_dict_multiple(self): 158 def test_compress_dict_multiple(self):
159 samples = [] 159 samples = []
160 for i in range(128): 160 for i in range(128):
161 samples.append(b'foo' * 64) 161 samples.append(b'foo' * 64)
214 params = zstd.get_frame_parameters(result); 214 params = zstd.get_frame_parameters(result);
215 self.assertEqual(params.content_size, 3); 215 self.assertEqual(params.content_size, 3);
216 self.assertEqual(params.dict_id, d.dict_id()) 216 self.assertEqual(params.dict_id, d.dict_id())
217 217
218 self.assertEqual(result, 218 self.assertEqual(result,
219 b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00' 219 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
220 b'\x66\x6f\x6f') 220 b'\x66\x6f\x6f')
221 221
222 def test_multithreaded_compression_params(self): 222 def test_multithreaded_compression_params(self):
223 params = zstd.ZstdCompressionParameters.from_level(0, threads=2) 223 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
224 cctx = zstd.ZstdCompressor(compression_params=params) 224 cctx = zstd.ZstdCompressor(compression_params=params)
334 self.assertEqual(cobj.compress(b'foo'), b'') 334 self.assertEqual(cobj.compress(b'foo'), b'')
335 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), 335 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') 336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
337 self.assertEqual(cobj.compress(b'bar'), b'') 337 self.assertEqual(cobj.compress(b'bar'), b'')
338 # 3 byte header plus content. 338 # 3 byte header plus content.
339 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar') 339 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
340 b'\x18\x00\x00bar')
341 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
340 342
341 def test_flush_empty_block(self): 343 def test_flush_empty_block(self):
342 cctx = zstd.ZstdCompressor(write_checksum=True) 344 cctx = zstd.ZstdCompressor(write_checksum=True)
343 cobj = cctx.compressobj() 345 cobj = cctx.compressobj()
344 346
574 @make_cffi 576 @make_cffi
575 class TestCompressor_stream_reader(unittest.TestCase): 577 class TestCompressor_stream_reader(unittest.TestCase):
576 def test_context_manager(self): 578 def test_context_manager(self):
577 cctx = zstd.ZstdCompressor() 579 cctx = zstd.ZstdCompressor()
578 580
579 reader = cctx.stream_reader(b'foo' * 60)
580 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
581 reader.read(10)
582
583 with cctx.stream_reader(b'foo') as reader: 581 with cctx.stream_reader(b'foo') as reader:
584 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'): 582 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
585 with reader as reader2: 583 with reader as reader2:
586 pass 584 pass
585
586 def test_no_context_manager(self):
587 cctx = zstd.ZstdCompressor()
588
589 reader = cctx.stream_reader(b'foo')
590 reader.read(4)
591 self.assertFalse(reader.closed)
592
593 reader.close()
594 self.assertTrue(reader.closed)
595 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
596 reader.read(1)
587 597
588 def test_not_implemented(self): 598 def test_not_implemented(self):
589 cctx = zstd.ZstdCompressor() 599 cctx = zstd.ZstdCompressor()
590 600
591 with cctx.stream_reader(b'foo' * 60) as reader: 601 with cctx.stream_reader(b'foo' * 60) as reader:
617 with cctx.stream_reader(b'boo') as reader: 627 with cctx.stream_reader(b'boo') as reader:
618 self.assertTrue(reader.readable()) 628 self.assertTrue(reader.readable())
619 self.assertFalse(reader.writable()) 629 self.assertFalse(reader.writable())
620 self.assertFalse(reader.seekable()) 630 self.assertFalse(reader.seekable())
621 self.assertFalse(reader.isatty()) 631 self.assertFalse(reader.isatty())
632 self.assertFalse(reader.closed)
622 self.assertIsNone(reader.flush()) 633 self.assertIsNone(reader.flush())
634 self.assertFalse(reader.closed)
635
636 self.assertTrue(reader.closed)
623 637
624 def test_read_closed(self): 638 def test_read_closed(self):
625 cctx = zstd.ZstdCompressor() 639 cctx = zstd.ZstdCompressor()
626 640
627 with cctx.stream_reader(b'foo' * 60) as reader: 641 with cctx.stream_reader(b'foo' * 60) as reader:
628 reader.close() 642 reader.close()
643 self.assertTrue(reader.closed)
629 with self.assertRaisesRegexp(ValueError, 'stream is closed'): 644 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
630 reader.read(10) 645 reader.read(10)
631 646
632 def test_read_bad_size(self): 647 def test_read_bad_size(self):
633 cctx = zstd.ZstdCompressor() 648 cctx = zstd.ZstdCompressor()
713 728
714 with cctx.stream_reader(b'foo' * 60) as reader: 729 with cctx.stream_reader(b'foo' * 60) as reader:
715 while reader.read(8192): 730 while reader.read(8192):
716 pass 731 pass
717 732
718 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'): 733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
719 reader.read(10) 734 reader.read(10)
720 735
721 def test_bad_size(self): 736 def test_bad_size(self):
722 cctx = zstd.ZstdCompressor() 737 cctx = zstd.ZstdCompressor()
723 738
790 samples.append(b'foobar' * 64) 805 samples.append(b'foobar' * 64)
791 806
792 d = zstd.train_dictionary(8192, samples) 807 d = zstd.train_dictionary(8192, samples)
793 808
794 h = hashlib.sha1(d.as_bytes()).hexdigest() 809 h = hashlib.sha1(d.as_bytes()).hexdigest()
795 self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027') 810 self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79')
796 811
797 buffer = io.BytesIO() 812 buffer = io.BytesIO()
798 cctx = zstd.ZstdCompressor(level=9, dict_data=d) 813 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
799 with cctx.stream_writer(buffer) as compressor: 814 with cctx.stream_writer(buffer) as compressor:
800 self.assertEqual(compressor.write(b'foo'), 0) 815 self.assertEqual(compressor.write(b'foo'), 0)
806 params = zstd.get_frame_parameters(compressed) 821 params = zstd.get_frame_parameters(compressed)
807 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) 822 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
808 self.assertEqual(params.window_size, 2097152) 823 self.assertEqual(params.window_size, 2097152)
809 self.assertEqual(params.dict_id, d.dict_id()) 824 self.assertEqual(params.dict_id, d.dict_id())
810 self.assertFalse(params.has_checksum) 825 self.assertFalse(params.has_checksum)
811 self.assertEqual(compressed, 826
812 b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00' 827 h = hashlib.sha1(compressed).hexdigest()
813 b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89') 828 self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786')
829
830 source = b'foo' + b'bar' + (b'foo' * 16384)
831
832 dctx = zstd.ZstdDecompressor(dict_data=d)
833
834 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
835 source)
814 836
815 def test_compression_params(self): 837 def test_compression_params(self):
816 params = zstd.ZstdCompressionParameters( 838 params = zstd.ZstdCompressionParameters(
817 window_log=20, 839 window_log=20,
818 chain_log=6, 840 chain_log=6,
1155 1177
1156 # Test another operation on errored compressor. 1178 # Test another operation on errored compressor.
1157 b''.join(cctx.read_to_iter(source)) 1179 b''.join(cctx.read_to_iter(source))
1158 1180
1159 1181
1182 @make_cffi
1183 class TestCompressor_chunker(unittest.TestCase):
1184 def test_empty(self):
1185 cctx = zstd.ZstdCompressor(write_content_size=False)
1186 chunker = cctx.chunker()
1187
1188 it = chunker.compress(b'')
1189
1190 with self.assertRaises(StopIteration):
1191 next(it)
1192
1193 it = chunker.finish()
1194
1195 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00')
1196
1197 with self.assertRaises(StopIteration):
1198 next(it)
1199
1200 def test_simple_input(self):
1201 cctx = zstd.ZstdCompressor()
1202 chunker = cctx.chunker()
1203
1204 it = chunker.compress(b'foobar')
1205
1206 with self.assertRaises(StopIteration):
1207 next(it)
1208
1209 it = chunker.compress(b'baz' * 30)
1210
1211 with self.assertRaises(StopIteration):
1212 next(it)
1213
1214 it = chunker.finish()
1215
1216 self.assertEqual(next(it),
1217 b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f'
1218 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1219
1220 with self.assertRaises(StopIteration):
1221 next(it)
1222
1223 def test_input_size(self):
1224 cctx = zstd.ZstdCompressor()
1225 chunker = cctx.chunker(size=1024)
1226
1227 it = chunker.compress(b'x' * 1000)
1228
1229 with self.assertRaises(StopIteration):
1230 next(it)
1231
1232 it = chunker.compress(b'y' * 24)
1233
1234 with self.assertRaises(StopIteration):
1235 next(it)
1236
1237 chunks = list(chunker.finish())
1238
1239 self.assertEqual(chunks, [
1240 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1241 b'\xa0\x16\xe3\x2b\x80\x05'
1242 ])
1243
1244 dctx = zstd.ZstdDecompressor()
1245
1246 self.assertEqual(dctx.decompress(b''.join(chunks)),
1247 (b'x' * 1000) + (b'y' * 24))
1248
1249 def test_small_chunk_size(self):
1250 cctx = zstd.ZstdCompressor()
1251 chunker = cctx.chunker(chunk_size=1)
1252
1253 chunks = list(chunker.compress(b'foo' * 1024))
1254 self.assertEqual(chunks, [])
1255
1256 chunks = list(chunker.finish())
1257 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1258
1259 self.assertEqual(
1260 b''.join(chunks),
1261 b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1262 b'\xfa\xd3\x77\x43')
1263
1264 dctx = zstd.ZstdDecompressor()
1265 self.assertEqual(dctx.decompress(b''.join(chunks),
1266 max_output_size=10000),
1267 b'foo' * 1024)
1268
1269 def test_input_types(self):
1270 cctx = zstd.ZstdCompressor()
1271
1272 mutable_array = bytearray(3)
1273 mutable_array[:] = b'foo'
1274
1275 sources = [
1276 memoryview(b'foo'),
1277 bytearray(b'foo'),
1278 mutable_array,
1279 ]
1280
1281 for source in sources:
1282 chunker = cctx.chunker()
1283
1284 self.assertEqual(list(chunker.compress(source)), [])
1285 self.assertEqual(list(chunker.finish()), [
1286 b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f'
1287 ])
1288
1289 def test_flush(self):
1290 cctx = zstd.ZstdCompressor()
1291 chunker = cctx.chunker()
1292
1293 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1294 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1295
1296 chunks1 = list(chunker.flush())
1297
1298 self.assertEqual(chunks1, [
1299 b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1300 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1301 ])
1302
1303 self.assertEqual(list(chunker.flush()), [])
1304 self.assertEqual(list(chunker.flush()), [])
1305
1306 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1307
1308 chunks2 = list(chunker.flush())
1309 self.assertEqual(len(chunks2), 1)
1310
1311 chunks3 = list(chunker.finish())
1312 self.assertEqual(len(chunks2), 1)
1313
1314 dctx = zstd.ZstdDecompressor()
1315
1316 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1317 max_output_size=10000),
1318 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1319
1320 def test_compress_after_finish(self):
1321 cctx = zstd.ZstdCompressor()
1322 chunker = cctx.chunker()
1323
1324 list(chunker.compress(b'foo'))
1325 list(chunker.finish())
1326
1327 with self.assertRaisesRegexp(
1328 zstd.ZstdError,
1329 'cannot call compress\(\) after compression finished'):
1330 list(chunker.compress(b'foo'))
1331
1332 def test_flush_after_finish(self):
1333 cctx = zstd.ZstdCompressor()
1334 chunker = cctx.chunker()
1335
1336 list(chunker.compress(b'foo'))
1337 list(chunker.finish())
1338
1339 with self.assertRaisesRegexp(
1340 zstd.ZstdError,
1341 'cannot call flush\(\) after compression finished'):
1342 list(chunker.flush())
1343
1344 def test_finish_after_finish(self):
1345 cctx = zstd.ZstdCompressor()
1346 chunker = cctx.chunker()
1347
1348 list(chunker.compress(b'foo'))
1349 list(chunker.finish())
1350
1351 with self.assertRaisesRegexp(
1352 zstd.ZstdError,
1353 'cannot call finish\(\) after compression finished'):
1354 list(chunker.finish())
1355
1356
1160 class TestCompressor_multi_compress_to_buffer(unittest.TestCase): 1357 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1161 def test_invalid_inputs(self): 1358 def test_invalid_inputs(self):
1162 cctx = zstd.ZstdCompressor() 1359 cctx = zstd.ZstdCompressor()
1163 1360
1164 with self.assertRaises(TypeError): 1361 with self.assertRaises(TypeError):