view contrib/python-zstandard/tests/test_train_dictionary.py @ 37495:b1fb341d8a61

zstandard: vendor python-zstandard 0.9.0 This was just released. It features a number of goodies. More info at https://gregoryszorc.com/blog/2018/04/09/release-of-python-zstandard-0.9/. The clang-format ignore list was updated to reflect the new source of files. The project contains a vendored copy of zstandard 1.3.4. The old version was 1.1.3. One of the changes between those versions is that zstandard is now dual licensed BSD + GPLv2 and the patent rights grant has been removed. Good riddance. The API should be backwards compatible. So no changes in core should be needed. However, there were a number of changes in the library that we'll want to adapt to. Those will be addressed in subsequent commits. Differential Revision: https://phab.mercurial-scm.org/D3198
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 09 Apr 2018 10:13:29 -0700
parents e0dc40530c5a
children 73fef626dae3
line wrap: on
line source

import struct
import sys
import unittest

import zstandard as zstd

from . common import (
    generate_samples,
    make_cffi,
)

if sys.version_info[0] >= 3:
    int_type = int
else:
    int_type = long


@make_cffi
class TestTrainDictionary(unittest.TestCase):
    def test_no_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary()

    def test_bad_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary(8192, u'foo')

        with self.assertRaises(ValueError):
            zstd.train_dictionary(8192, [u'foo'])

    def test_no_params(self):
        d = zstd.train_dictionary(8192, generate_samples())
        self.assertIsInstance(d.dict_id(), int_type)

        # The dictionary ID may be different across platforms.
        expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())

        data = d.as_bytes()
        self.assertEqual(data[0:8], expected)

    def test_basic(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
        self.assertIsInstance(d.dict_id(), int_type)

        data = d.as_bytes()
        self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')

        self.assertEqual(d.k, 64)
        self.assertEqual(d.d, 16)

    def test_set_dict_id(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
                                  dict_id=42)
        self.assertEqual(d.dict_id(), 42)

    def test_optimize(self):
        d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
                                  d=16)

        self.assertEqual(d.k, 50)
        self.assertEqual(d.d, 16)

@make_cffi
class TestCompressionDict(unittest.TestCase):
    def test_bad_mode(self):
        with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
            zstd.ZstdCompressionDict(b'foo', dict_type=42)

    def test_bad_precompute_compress(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)

        with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
            d.precompute_compress()

        with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
            d.precompute_compress(level=3,
                                  compression_params=zstd.CompressionParameters())

    def test_precompute_compress_rawcontent(self):
        d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
                                     dict_type=zstd.DICT_TYPE_RAWCONTENT)
        d.precompute_compress(level=1)

        d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
                                     dict_type=zstd.DICT_TYPE_FULLDICT)
        with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
            d.precompute_compress(level=1)