diff contrib/python-zstandard/tests/test_decompressor_fuzzing.py @ 42070:675775c33ab6

zstandard: vendor python-zstandard 0.11 The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.8. The old version was 1.3.6. This should result in some minor performance wins. test-check-py3-compat.t was updated to reflect now-passing tests on Python 3.8. Some HTTP tests were updated to reflect new zstd compression output. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D6199
author Gregory Szorc <gregory.szorc@gmail.com>
date Thu, 04 Apr 2019 17:34:43 -0700
parents b1fb341d8a61
children de7838053207
line wrap: on
line diff
--- a/contrib/python-zstandard/tests/test_decompressor_fuzzing.py	Thu Apr 04 15:24:03 2019 -0700
+++ b/contrib/python-zstandard/tests/test_decompressor_fuzzing.py	Thu Apr 04 17:34:43 2019 -0700
@@ -12,6 +12,7 @@
 
 from . common import (
     make_cffi,
+    NonClosingBytesIO,
     random_input_data,
 )
 
@@ -23,22 +24,200 @@
         suppress_health_check=[hypothesis.HealthCheck.large_base_example])
     @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                       level=strategies.integers(min_value=1, max_value=5),
-                      source_read_size=strategies.integers(1, 16384),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576),
                       read_sizes=strategies.data())
-    def test_stream_source_read_variance(self, original, level, source_read_size,
-                                         read_sizes):
+    def test_stream_source_read_variance(self, original, level, streaming,
+                                         source_read_size, read_sizes):
         cctx = zstd.ZstdCompressor(level=level)
-        frame = cctx.compress(original)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            source.seek(0)
+        else:
+            frame = cctx.compress(original)
+            source = io.BytesIO(frame)
 
         dctx = zstd.ZstdDecompressor()
-        source = io.BytesIO(frame)
 
         chunks = []
         with dctx.stream_reader(source, read_size=source_read_size) as reader:
             while True:
-                read_size = read_sizes.draw(strategies.integers(1, 16384))
+                read_size = read_sizes.draw(strategies.integers(-1, 131072))
+                chunk = reader.read(read_size)
+                if not chunk and read_size:
+                    break
+
+                chunks.append(chunk)
+
+        self.assertEqual(b''.join(chunks), original)
+
+    # Similar to above except we have a constant read() size.
+    @hypothesis.settings(
+        suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+    @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                      level=strategies.integers(min_value=1, max_value=5),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576),
+                      read_size=strategies.integers(-1, 131072))
+    def test_stream_source_read_size(self, original, level, streaming,
+                                     source_read_size, read_size):
+        if read_size == 0:
+            read_size = 1
+
+        cctx = zstd.ZstdCompressor(level=level)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            source.seek(0)
+        else:
+            frame = cctx.compress(original)
+            source = io.BytesIO(frame)
+
+        dctx = zstd.ZstdDecompressor()
+
+        chunks = []
+        reader = dctx.stream_reader(source, read_size=source_read_size)
+        while True:
+            chunk = reader.read(read_size)
+            if not chunk and read_size:
+                break
+
+            chunks.append(chunk)
+
+        self.assertEqual(b''.join(chunks), original)
+
+    @hypothesis.settings(
+        suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+    @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                      level=strategies.integers(min_value=1, max_value=5),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576),
+                      read_sizes=strategies.data())
+    def test_buffer_source_read_variance(self, original, level, streaming,
+                                         source_read_size, read_sizes):
+        cctx = zstd.ZstdCompressor(level=level)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            frame = source.getvalue()
+        else:
+            frame = cctx.compress(original)
+
+        dctx = zstd.ZstdDecompressor()
+        chunks = []
+
+        with dctx.stream_reader(frame, read_size=source_read_size) as reader:
+            while True:
+                read_size = read_sizes.draw(strategies.integers(-1, 131072))
                 chunk = reader.read(read_size)
-                if not chunk:
+                if not chunk and read_size:
+                    break
+
+                chunks.append(chunk)
+
+        self.assertEqual(b''.join(chunks), original)
+
+    # Similar to above except we have a constant read() size.
+    @hypothesis.settings(
+        suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+    @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                      level=strategies.integers(min_value=1, max_value=5),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576),
+                      read_size=strategies.integers(-1, 131072))
+    def test_buffer_source_constant_read_size(self, original, level, streaming,
+                                              source_read_size, read_size):
+        if read_size == 0:
+            read_size = -1
+
+        cctx = zstd.ZstdCompressor(level=level)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            frame = source.getvalue()
+        else:
+            frame = cctx.compress(original)
+
+        dctx = zstd.ZstdDecompressor()
+        chunks = []
+
+        reader = dctx.stream_reader(frame, read_size=source_read_size)
+        while True:
+            chunk = reader.read(read_size)
+            if not chunk and read_size:
+                break
+
+            chunks.append(chunk)
+
+        self.assertEqual(b''.join(chunks), original)
+
+    @hypothesis.settings(
+        suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+    @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                      level=strategies.integers(min_value=1, max_value=5),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576))
+    def test_stream_source_readall(self, original, level, streaming,
+                                         source_read_size):
+        cctx = zstd.ZstdCompressor(level=level)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            source.seek(0)
+        else:
+            frame = cctx.compress(original)
+            source = io.BytesIO(frame)
+
+        dctx = zstd.ZstdDecompressor()
+
+        data = dctx.stream_reader(source, read_size=source_read_size).readall()
+        self.assertEqual(data, original)
+
+    @hypothesis.settings(
+        suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+    @hypothesis.given(original=strategies.sampled_from(random_input_data()),
+                      level=strategies.integers(min_value=1, max_value=5),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576),
+                      read_sizes=strategies.data())
+    def test_stream_source_read1_variance(self, original, level, streaming,
+                                          source_read_size, read_sizes):
+        cctx = zstd.ZstdCompressor(level=level)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            source.seek(0)
+        else:
+            frame = cctx.compress(original)
+            source = io.BytesIO(frame)
+
+        dctx = zstd.ZstdDecompressor()
+
+        chunks = []
+        with dctx.stream_reader(source, read_size=source_read_size) as reader:
+            while True:
+                read_size = read_sizes.draw(strategies.integers(-1, 131072))
+                chunk = reader.read1(read_size)
+                if not chunk and read_size:
                     break
 
                 chunks.append(chunk)
@@ -49,24 +228,36 @@
         suppress_health_check=[hypothesis.HealthCheck.large_base_example])
     @hypothesis.given(original=strategies.sampled_from(random_input_data()),
                       level=strategies.integers(min_value=1, max_value=5),
-                      source_read_size=strategies.integers(1, 16384),
+                      streaming=strategies.booleans(),
+                      source_read_size=strategies.integers(1, 1048576),
                       read_sizes=strategies.data())
-    def test_buffer_source_read_variance(self, original, level, source_read_size,
-                                         read_sizes):
+    def test_stream_source_readinto1_variance(self, original, level, streaming,
+                                          source_read_size, read_sizes):
         cctx = zstd.ZstdCompressor(level=level)
-        frame = cctx.compress(original)
+
+        if streaming:
+            source = io.BytesIO()
+            writer = cctx.stream_writer(source)
+            writer.write(original)
+            writer.flush(zstd.FLUSH_FRAME)
+            source.seek(0)
+        else:
+            frame = cctx.compress(original)
+            source = io.BytesIO(frame)
 
         dctx = zstd.ZstdDecompressor()
+
         chunks = []
-
-        with dctx.stream_reader(frame, read_size=source_read_size) as reader:
+        with dctx.stream_reader(source, read_size=source_read_size) as reader:
             while True:
-                read_size = read_sizes.draw(strategies.integers(1, 16384))
-                chunk = reader.read(read_size)
-                if not chunk:
+                read_size = read_sizes.draw(strategies.integers(1, 131072))
+                b = bytearray(read_size)
+                count = reader.readinto1(b)
+
+                if not count:
                     break
 
-                chunks.append(chunk)
+                chunks.append(bytes(b[0:count]))
 
         self.assertEqual(b''.join(chunks), original)
 
@@ -75,7 +266,7 @@
     @hypothesis.given(
         original=strategies.sampled_from(random_input_data()),
         level=strategies.integers(min_value=1, max_value=5),
-        source_read_size=strategies.integers(1, 16384),
+        source_read_size=strategies.integers(1, 1048576),
         seek_amounts=strategies.data(),
         read_sizes=strategies.data())
     def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
@@ -99,6 +290,46 @@
 
                 self.assertEqual(original[offset:offset + len(chunk)], chunk)
 
+    @hypothesis.settings(
+        suppress_health_check=[hypothesis.HealthCheck.large_base_example])
+    @hypothesis.given(
+        originals=strategies.data(),
+        frame_count=strategies.integers(min_value=2, max_value=10),
+        level=strategies.integers(min_value=1, max_value=5),
+        source_read_size=strategies.integers(1, 1048576),
+        read_sizes=strategies.data())
+    def test_multiple_frames(self, originals, frame_count, level,
+                             source_read_size, read_sizes):
+
+        cctx = zstd.ZstdCompressor(level=level)
+        source = io.BytesIO()
+        buffer = io.BytesIO()
+        writer = cctx.stream_writer(buffer)
+
+        for i in range(frame_count):
+            data = originals.draw(strategies.sampled_from(random_input_data()))
+            source.write(data)
+            writer.write(data)
+            writer.flush(zstd.FLUSH_FRAME)
+
+        dctx = zstd.ZstdDecompressor()
+        buffer.seek(0)
+        reader = dctx.stream_reader(buffer, read_size=source_read_size,
+                                    read_across_frames=True)
+
+        chunks = []
+
+        while True:
+            read_amount = read_sizes.draw(strategies.integers(-1, 16384))
+            chunk = reader.read(read_amount)
+
+            if not chunk and read_amount:
+                break
+
+            chunks.append(chunk)
+
+        self.assertEqual(source.getvalue(), b''.join(chunks))
+
 
 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
 @make_cffi
@@ -113,7 +344,7 @@
 
         dctx = zstd.ZstdDecompressor()
         source = io.BytesIO(frame)
-        dest = io.BytesIO()
+        dest = NonClosingBytesIO()
 
         with dctx.stream_writer(dest, write_size=write_size) as decompressor:
             while True:
@@ -234,10 +465,12 @@
                                    write_checksum=True,
                                    **kwargs)
 
+        if not hasattr(cctx, 'multi_compress_to_buffer'):
+            self.skipTest('multi_compress_to_buffer not available')
+
         frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
 
         dctx = zstd.ZstdDecompressor(**kwargs)
-
         result = dctx.multi_decompress_to_buffer(frames_buffer)
 
         self.assertEqual(len(result), len(original))