comparison contrib/python-zstandard/tests/test_data_structures.py @ 43994:de7838053207

zstandard: vendor python-zstandard 0.13.0 Version 0.13.0 of the package was just released. It contains an upgraded zstd C library which can result in some performance wins, official support for Python 3.8, and a blackened code base. There were no meaningful code or functionality changes in this release of python-zstandard: just reformatting and an upgraded zstd library version. So the diff seems much larger than what it is. Files were added without modifications. The clang-format-ignorelist file was updated to reflect a new header file in the zstd distribution. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D7770
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 28 Dec 2019 09:55:45 -0800
parents 69de49c4e39c
children 5e84a96d865b
comparison
equal deleted inserted replaced
43993:873d0fecb9a3 43994:de7838053207
1 import sys 1 import sys
2 import unittest 2 import unittest
3 3
4 import zstandard as zstd 4 import zstandard as zstd
5 5
6 from . common import ( 6 from .common import (
7 make_cffi, 7 make_cffi,
8 TestCase,
8 ) 9 )
9 10
10 11
11 @make_cffi 12 @make_cffi
12 class TestCompressionParameters(unittest.TestCase): 13 class TestCompressionParameters(TestCase):
13 def test_bounds(self): 14 def test_bounds(self):
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN, 15 zstd.ZstdCompressionParameters(
15 chain_log=zstd.CHAINLOG_MIN, 16 window_log=zstd.WINDOWLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN, 17 chain_log=zstd.CHAINLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN, 18 hash_log=zstd.HASHLOG_MIN,
18 min_match=zstd.MINMATCH_MIN + 1, 19 search_log=zstd.SEARCHLOG_MIN,
19 target_length=zstd.TARGETLENGTH_MIN, 20 min_match=zstd.MINMATCH_MIN + 1,
20 strategy=zstd.STRATEGY_FAST) 21 target_length=zstd.TARGETLENGTH_MIN,
21 22 strategy=zstd.STRATEGY_FAST,
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX, 23 )
23 chain_log=zstd.CHAINLOG_MAX, 24
24 hash_log=zstd.HASHLOG_MAX, 25 zstd.ZstdCompressionParameters(
25 search_log=zstd.SEARCHLOG_MAX, 26 window_log=zstd.WINDOWLOG_MAX,
26 min_match=zstd.MINMATCH_MAX - 1, 27 chain_log=zstd.CHAINLOG_MAX,
27 target_length=zstd.TARGETLENGTH_MAX, 28 hash_log=zstd.HASHLOG_MAX,
28 strategy=zstd.STRATEGY_BTULTRA2) 29 search_log=zstd.SEARCHLOG_MAX,
30 min_match=zstd.MINMATCH_MAX - 1,
31 target_length=zstd.TARGETLENGTH_MAX,
32 strategy=zstd.STRATEGY_BTULTRA2,
33 )
29 34
30 def test_from_level(self): 35 def test_from_level(self):
31 p = zstd.ZstdCompressionParameters.from_level(1) 36 p = zstd.ZstdCompressionParameters.from_level(1)
32 self.assertIsInstance(p, zstd.CompressionParameters) 37 self.assertIsInstance(p, zstd.CompressionParameters)
33 38
35 40
36 p = zstd.ZstdCompressionParameters.from_level(-4) 41 p = zstd.ZstdCompressionParameters.from_level(-4)
37 self.assertEqual(p.window_log, 19) 42 self.assertEqual(p.window_log, 19)
38 43
39 def test_members(self): 44 def test_members(self):
40 p = zstd.ZstdCompressionParameters(window_log=10, 45 p = zstd.ZstdCompressionParameters(
41 chain_log=6, 46 window_log=10,
42 hash_log=7, 47 chain_log=6,
43 search_log=4, 48 hash_log=7,
44 min_match=5, 49 search_log=4,
45 target_length=8, 50 min_match=5,
46 strategy=1) 51 target_length=8,
52 strategy=1,
53 )
47 self.assertEqual(p.window_log, 10) 54 self.assertEqual(p.window_log, 10)
48 self.assertEqual(p.chain_log, 6) 55 self.assertEqual(p.chain_log, 6)
49 self.assertEqual(p.hash_log, 7) 56 self.assertEqual(p.hash_log, 7)
50 self.assertEqual(p.search_log, 4) 57 self.assertEqual(p.search_log, 4)
51 self.assertEqual(p.min_match, 5) 58 self.assertEqual(p.min_match, 5)
56 self.assertEqual(p.compression_level, 2) 63 self.assertEqual(p.compression_level, 2)
57 64
58 p = zstd.ZstdCompressionParameters(threads=4) 65 p = zstd.ZstdCompressionParameters(threads=4)
59 self.assertEqual(p.threads, 4) 66 self.assertEqual(p.threads, 4)
60 67
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, 68 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6)
62 overlap_log=6)
63 self.assertEqual(p.threads, 2) 69 self.assertEqual(p.threads, 2)
64 self.assertEqual(p.job_size, 1048576) 70 self.assertEqual(p.job_size, 1048576)
65 self.assertEqual(p.overlap_log, 6) 71 self.assertEqual(p.overlap_log, 6)
66 self.assertEqual(p.overlap_size_log, 6) 72 self.assertEqual(p.overlap_size_log, 6)
67 73
89 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) 95 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
90 self.assertEqual(p.ldm_hash_every_log, 8) 96 self.assertEqual(p.ldm_hash_every_log, 8)
91 self.assertEqual(p.ldm_hash_rate_log, 8) 97 self.assertEqual(p.ldm_hash_rate_log, 8)
92 98
93 def test_estimated_compression_context_size(self): 99 def test_estimated_compression_context_size(self):
94 p = zstd.ZstdCompressionParameters(window_log=20, 100 p = zstd.ZstdCompressionParameters(
95 chain_log=16, 101 window_log=20,
96 hash_log=17, 102 chain_log=16,
97 search_log=1, 103 hash_log=17,
98 min_match=5, 104 search_log=1,
99 target_length=16, 105 min_match=5,
100 strategy=zstd.STRATEGY_DFAST) 106 target_length=16,
107 strategy=zstd.STRATEGY_DFAST,
108 )
101 109
102 # 32-bit has slightly different values from 64-bit. 110 # 32-bit has slightly different values from 64-bit.
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, 111 self.assertAlmostEqual(
104 delta=250) 112 p.estimated_compression_context_size(), 1294464, delta=400
113 )
105 114
106 def test_strategy(self): 115 def test_strategy(self):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'): 116 with self.assertRaisesRegex(
117 ValueError, "cannot specify both compression_strategy"
118 ):
108 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) 119 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
109 120
110 p = zstd.ZstdCompressionParameters(strategy=2) 121 p = zstd.ZstdCompressionParameters(strategy=2)
111 self.assertEqual(p.compression_strategy, 2) 122 self.assertEqual(p.compression_strategy, 2)
112 123
113 p = zstd.ZstdCompressionParameters(strategy=3) 124 p = zstd.ZstdCompressionParameters(strategy=3)
114 self.assertEqual(p.compression_strategy, 3) 125 self.assertEqual(p.compression_strategy, 3)
115 126
116 def test_ldm_hash_rate_log(self): 127 def test_ldm_hash_rate_log(self):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'): 128 with self.assertRaisesRegex(
129 ValueError, "cannot specify both ldm_hash_rate_log"
130 ):
118 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) 131 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
119 132
120 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) 133 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
121 self.assertEqual(p.ldm_hash_every_log, 8) 134 self.assertEqual(p.ldm_hash_every_log, 8)
122 135
123 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16) 136 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
124 self.assertEqual(p.ldm_hash_every_log, 16) 137 self.assertEqual(p.ldm_hash_every_log, 16)
125 138
126 def test_overlap_log(self): 139 def test_overlap_log(self):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'): 140 with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"):
128 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) 141 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
129 142
130 p = zstd.ZstdCompressionParameters(overlap_log=2) 143 p = zstd.ZstdCompressionParameters(overlap_log=2)
131 self.assertEqual(p.overlap_log, 2) 144 self.assertEqual(p.overlap_log, 2)
132 self.assertEqual(p.overlap_size_log, 2) 145 self.assertEqual(p.overlap_size_log, 2)
135 self.assertEqual(p.overlap_log, 4) 148 self.assertEqual(p.overlap_log, 4)
136 self.assertEqual(p.overlap_size_log, 4) 149 self.assertEqual(p.overlap_size_log, 4)
137 150
138 151
139 @make_cffi 152 @make_cffi
140 class TestFrameParameters(unittest.TestCase): 153 class TestFrameParameters(TestCase):
141 def test_invalid_type(self): 154 def test_invalid_type(self):
142 with self.assertRaises(TypeError): 155 with self.assertRaises(TypeError):
143 zstd.get_frame_parameters(None) 156 zstd.get_frame_parameters(None)
144 157
145 # Python 3 doesn't appear to convert unicode to Py_buffer. 158 # Python 3 doesn't appear to convert unicode to Py_buffer.
146 if sys.version_info[0] >= 3: 159 if sys.version_info[0] >= 3:
147 with self.assertRaises(TypeError): 160 with self.assertRaises(TypeError):
148 zstd.get_frame_parameters(u'foobarbaz') 161 zstd.get_frame_parameters(u"foobarbaz")
149 else: 162 else:
150 # CPython will convert unicode to Py_buffer. But CFFI won't. 163 # CPython will convert unicode to Py_buffer. But CFFI won't.
151 if zstd.backend == 'cffi': 164 if zstd.backend == "cffi":
152 with self.assertRaises(TypeError): 165 with self.assertRaises(TypeError):
153 zstd.get_frame_parameters(u'foobarbaz') 166 zstd.get_frame_parameters(u"foobarbaz")
154 else: 167 else:
155 with self.assertRaises(zstd.ZstdError): 168 with self.assertRaises(zstd.ZstdError):
156 zstd.get_frame_parameters(u'foobarbaz') 169 zstd.get_frame_parameters(u"foobarbaz")
157 170
158 def test_invalid_input_sizes(self): 171 def test_invalid_input_sizes(self):
159 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): 172 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
160 zstd.get_frame_parameters(b'') 173 zstd.get_frame_parameters(b"")
161 174
162 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): 175 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
163 zstd.get_frame_parameters(zstd.FRAME_HEADER) 176 zstd.get_frame_parameters(zstd.FRAME_HEADER)
164 177
165 def test_invalid_frame(self): 178 def test_invalid_frame(self):
166 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): 179 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
167 zstd.get_frame_parameters(b'foobarbaz') 180 zstd.get_frame_parameters(b"foobarbaz")
168 181
169 def test_attributes(self): 182 def test_attributes(self):
170 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') 183 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00")
171 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) 184 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
172 self.assertEqual(params.window_size, 1024) 185 self.assertEqual(params.window_size, 1024)
173 self.assertEqual(params.dict_id, 0) 186 self.assertEqual(params.dict_id, 0)
174 self.assertFalse(params.has_checksum) 187 self.assertFalse(params.has_checksum)
175 188
176 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. 189 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
177 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') 190 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff")
178 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) 191 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
179 self.assertEqual(params.window_size, 1024) 192 self.assertEqual(params.window_size, 1024)
180 self.assertEqual(params.dict_id, 255) 193 self.assertEqual(params.dict_id, 255)
181 self.assertFalse(params.has_checksum) 194 self.assertFalse(params.has_checksum)
182 195
183 # Lowest 3rd bit indicates if checksum is present. 196 # Lowest 3rd bit indicates if checksum is present.
184 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') 197 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00")
185 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) 198 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
186 self.assertEqual(params.window_size, 1024) 199 self.assertEqual(params.window_size, 1024)
187 self.assertEqual(params.dict_id, 0) 200 self.assertEqual(params.dict_id, 0)
188 self.assertTrue(params.has_checksum) 201 self.assertTrue(params.has_checksum)
189 202
190 # Upper 2 bits indicate content size. 203 # Upper 2 bits indicate content size.
191 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00') 204 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00")
192 self.assertEqual(params.content_size, 511) 205 self.assertEqual(params.content_size, 511)
193 self.assertEqual(params.window_size, 1024) 206 self.assertEqual(params.window_size, 1024)
194 self.assertEqual(params.dict_id, 0) 207 self.assertEqual(params.dict_id, 0)
195 self.assertFalse(params.has_checksum) 208 self.assertFalse(params.has_checksum)
196 209
197 # Window descriptor is 2nd byte after frame header. 210 # Window descriptor is 2nd byte after frame header.
198 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') 211 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40")
199 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) 212 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
200 self.assertEqual(params.window_size, 262144) 213 self.assertEqual(params.window_size, 262144)
201 self.assertEqual(params.dict_id, 0) 214 self.assertEqual(params.dict_id, 0)
202 self.assertFalse(params.has_checksum) 215 self.assertFalse(params.has_checksum)
203 216
204 # Set multiple things. 217 # Set multiple things.
205 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00') 218 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00")
206 self.assertEqual(params.content_size, 272) 219 self.assertEqual(params.content_size, 272)
207 self.assertEqual(params.window_size, 262144) 220 self.assertEqual(params.window_size, 262144)
208 self.assertEqual(params.dict_id, 15) 221 self.assertEqual(params.dict_id, 15)
209 self.assertTrue(params.has_checksum) 222 self.assertTrue(params.has_checksum)
210 223
211 def test_input_types(self): 224 def test_input_types(self):
212 v = zstd.FRAME_HEADER + b'\x00\x00' 225 v = zstd.FRAME_HEADER + b"\x00\x00"
213 226
214 mutable_array = bytearray(len(v)) 227 mutable_array = bytearray(len(v))
215 mutable_array[:] = v 228 mutable_array[:] = v
216 229
217 sources = [ 230 sources = [