comparison contrib/python-zstandard/tests/test_compressor_fuzzing.py @ 40121:73fef626dae3

zstandard: vendor python-zstandard 0.10.1 This was just released. The upstream source distribution from PyPI was extracted. Unwanted files were removed. The clang-format ignore list was updated to reflect the new source of files. setup.py was updated to pass a new argument to python-zstandard's function for returning an Extension instance. Upstream had to change to use relative paths because Python 3.7's packaging doesn't seem to like absolute paths when defining sources, includes, etc. The default relative path calculation is relative to setup_zstd.py which is different from the directory of Mercurial's setup.py. The project contains a vendored copy of zstandard 1.3.6. The old version was 1.3.4. The API should be backwards compatible and nothing in core should need adjusted. However, there is a new "chunker" API that we may find useful in places where we want to emit compressed chunks of a fixed size. There are a pair of bug fixes in 0.10.0 with regards to compressobj() and decompressobj() when block flushing is used. I actually found these bugs when introducing these APIs in Mercurial! But existing Mercurial code is not affected because we don't perform block flushing. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D4911
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 08 Oct 2018 16:27:40 -0700
parents b1fb341d8a61
children 675775c33ab6
comparison
equal deleted inserted replaced
40120:89742f1fa6cb 40121:73fef626dae3
133 133
134 chunks.append(cobj.flush()) 134 chunks.append(cobj.flush())
135 135
136 self.assertEqual(b''.join(chunks), ref_frame) 136 self.assertEqual(b''.join(chunks), ref_frame)
137 137
138 @hypothesis.settings(
139 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
140 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
141 level=strategies.integers(min_value=1, max_value=5),
142 chunk_sizes=strategies.data(),
143 flushes=strategies.data())
144 def test_flush_block(self, original, level, chunk_sizes, flushes):
145 cctx = zstd.ZstdCompressor(level=level)
146 cobj = cctx.compressobj()
147
148 dctx = zstd.ZstdDecompressor()
149 dobj = dctx.decompressobj()
150
151 compressed_chunks = []
152 decompressed_chunks = []
153 i = 0
154 while True:
155 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
156 source = original[i:i + input_size]
157 if not source:
158 break
159
160 i += input_size
161
162 chunk = cobj.compress(source)
163 compressed_chunks.append(chunk)
164 decompressed_chunks.append(dobj.decompress(chunk))
165
166 if not flushes.draw(strategies.booleans()):
167 continue
168
169 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
170 compressed_chunks.append(chunk)
171 decompressed_chunks.append(dobj.decompress(chunk))
172
173 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
174
175 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
176 compressed_chunks.append(chunk)
177 decompressed_chunks.append(dobj.decompress(chunk))
178
179 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
180 max_output_size=len(original)),
181 original)
182 self.assertEqual(b''.join(decompressed_chunks), original)
138 183
139 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') 184 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
140 @make_cffi 185 @make_cffi
141 class TestCompressor_read_to_iter_fuzzing(unittest.TestCase): 186 class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
142 @hypothesis.given(original=strategies.sampled_from(random_input_data()), 187 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
184 # verify the decompressed data matches the input. 229 # verify the decompressed data matches the input.
185 dctx = zstd.ZstdDecompressor(**kwargs) 230 dctx = zstd.ZstdDecompressor(**kwargs)
186 231
187 for i, frame in enumerate(result): 232 for i, frame in enumerate(result):
188 self.assertEqual(dctx.decompress(frame), original[i]) 233 self.assertEqual(dctx.decompress(frame), original[i])
234
235
236 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
237 @make_cffi
238 class TestCompressor_chunker_fuzzing(unittest.TestCase):
239 @hypothesis.settings(
240 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
241 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
242 level=strategies.integers(min_value=1, max_value=5),
243 chunk_size=strategies.integers(
244 min_value=1,
245 max_value=32 * 1048576),
246 input_sizes=strategies.data())
247 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
248 cctx = zstd.ZstdCompressor(level=level)
249 chunker = cctx.chunker(chunk_size=chunk_size)
250
251 chunks = []
252 i = 0
253 while True:
254 input_size = input_sizes.draw(strategies.integers(1, 4096))
255 source = original[i:i + input_size]
256 if not source:
257 break
258
259 chunks.extend(chunker.compress(source))
260 i += input_size
261
262 chunks.extend(chunker.finish())
263
264 dctx = zstd.ZstdDecompressor()
265
266 self.assertEqual(dctx.decompress(b''.join(chunks),
267 max_output_size=len(original)),
268 original)
269
270 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
271
272 @hypothesis.settings(
273 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
274 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
275 level=strategies.integers(min_value=1, max_value=5),
276 chunk_size=strategies.integers(
277 min_value=1,
278 max_value=32 * 1048576),
279 input_sizes=strategies.data(),
280 flushes=strategies.data())
281 def test_flush_block(self, original, level, chunk_size, input_sizes,
282 flushes):
283 cctx = zstd.ZstdCompressor(level=level)
284 chunker = cctx.chunker(chunk_size=chunk_size)
285
286 dctx = zstd.ZstdDecompressor()
287 dobj = dctx.decompressobj()
288
289 compressed_chunks = []
290 decompressed_chunks = []
291 i = 0
292 while True:
293 input_size = input_sizes.draw(strategies.integers(1, 4096))
294 source = original[i:i + input_size]
295 if not source:
296 break
297
298 i += input_size
299
300 chunks = list(chunker.compress(source))
301 compressed_chunks.extend(chunks)
302 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
303
304 if not flushes.draw(strategies.booleans()):
305 continue
306
307 chunks = list(chunker.flush())
308 compressed_chunks.extend(chunks)
309 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
310
311 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
312
313 chunks = list(chunker.finish())
314 compressed_chunks.extend(chunks)
315 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
316
317 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
318 max_output_size=len(original)),
319 original)
320 self.assertEqual(b''.join(decompressed_chunks), original)