contrib/python-zstandard/tests/test_train_dictionary.py
author Gregory Szorc <gregory.szorc@gmail.com>
Mon, 09 Apr 2018 10:13:29 -0700
changeset 37495 b1fb341d8a61
parent 31799 e0dc40530c5a
child 40122 73fef626dae3
permissions -rw-r--r--
zstandard: vendor python-zstandard 0.9.0 This was just released. It features a number of goodies. More info at https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.4. The old version was 1.1.3. One of the changes between those versions is that zstandard is now dual licensed BSD + GPLv2 and the patent rights grant has been removed. Good riddance. The API should be backwards compatible. So no changes in core should be needed. However, there were a number of changes in the library that we'll want to adapt to. Those will be addressed in subsequent commits. Differential Revision: https://phab.mercurial-scm.org/D3198
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
     1
import struct
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     2
import sys
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
     3
import unittest
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     4
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
     5
import zstandard as zstd
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
     6
30924
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30444
diff changeset
     7
from . common import (
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
     8
    generate_samples,
30924
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30444
diff changeset
     9
    make_cffi,
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30444
diff changeset
    10
)
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    11
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    12
if sys.version_info[0] >= 3:
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    13
    int_type = int
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    14
else:
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    15
    int_type = long
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    16
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    17
30924
c32454d69b85 zstd: vendor python-zstandard 0.7.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30444
diff changeset
    18
@make_cffi
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    19
class TestTrainDictionary(unittest.TestCase):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    20
    def test_no_args(self):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    21
        with self.assertRaises(TypeError):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    22
            zstd.train_dictionary()
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    23
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    24
    def test_bad_args(self):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    25
        with self.assertRaises(TypeError):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    26
            zstd.train_dictionary(8192, u'foo')
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    27
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    28
        with self.assertRaises(ValueError):
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    29
            zstd.train_dictionary(8192, [u'foo'])
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    30
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    31
    def test_no_params(self):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    32
        d = zstd.train_dictionary(8192, generate_samples())
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    33
        self.assertIsInstance(d.dict_id(), int_type)
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    34
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    35
        # The dictionary ID may be different across platforms.
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    36
        expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
30444
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    37
b86a448a2965 zstd: vendor python-zstandard 0.5.0
Gregory Szorc <gregory.szorc@gmail.com>
parents:
diff changeset
    38
        data = d.as_bytes()
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    39
        self.assertEqual(data[0:8], expected)
31799
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    40
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    41
    def test_basic(self):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    42
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
31799
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    43
        self.assertIsInstance(d.dict_id(), int_type)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    44
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    45
        data = d.as_bytes()
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    46
        self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    47
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    48
        self.assertEqual(d.k, 64)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    49
        self.assertEqual(d.d, 16)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    50
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    51
    def test_set_dict_id(self):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    52
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    53
                                  dict_id=42)
31799
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    54
        self.assertEqual(d.dict_id(), 42)
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    55
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    56
    def test_optimize(self):
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    57
        d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    58
                                  d=16)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    59
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    60
        self.assertEqual(d.k, 50)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    61
        self.assertEqual(d.d, 16)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    62
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    63
@make_cffi
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    64
class TestCompressionDict(unittest.TestCase):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    65
    def test_bad_mode(self):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    66
        with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    67
            zstd.ZstdCompressionDict(b'foo', dict_type=42)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    68
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    69
    def test_bad_precompute_compress(self):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    70
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
31799
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    71
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    72
        with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    73
            d.precompute_compress()
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    74
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    75
        with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    76
            d.precompute_compress(level=3,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    77
                                  compression_params=zstd.CompressionParameters())
31799
e0dc40530c5a zstd: vendor python-zstandard 0.8.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30924
diff changeset
    78
37495
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    79
    def test_precompute_compress_rawcontent(self):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    80
        d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    81
                                     dict_type=zstd.DICT_TYPE_RAWCONTENT)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    82
        d.precompute_compress(level=1)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    83
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    84
        d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    85
                                     dict_type=zstd.DICT_TYPE_FULLDICT)
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    86
        with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
b1fb341d8a61 zstandard: vendor python-zstandard 0.9.0
Gregory Szorc <gregory.szorc@gmail.com>
parents: 31799
diff changeset
    87
            d.precompute_compress(level=1)