contrib/python-zstandard/tests/test_train_dictionary.py
author Gregory Szorc <gregory.szorc@gmail.com>
Sat, 28 Dec 2019 09:55:45 -0800
changeset 43999 de7838053207
parent 42941 69de49c4e39c
child 44232 5e84a96d865b
permissions -rw-r--r--
zstandard: vendor python-zstandard 0.13.0 Version 0.13.0 of the package was just released. It contains an upgraded zstd C library which can result in some performance wins, official support for Python 3.8, and a blackened code base. There were no meaningful code or functionality changes in this release of python-zstandard: just reformatting and an upgraded zstd library version. So the diff seems much larger than what it is. Files were added without modifications. The clang-format-ignorelist file was updated to reflect a new header file in the zstd distribution. # no-check-commit because 3rd party code has different style guidelines Differential Revision: https://phab.mercurial-scm.org/D7770

import struct
import sys
import unittest

import zstandard as zstd

from .common import (
    generate_samples,
    make_cffi,
    random_input_data,
    TestCase,
)

if sys.version_info[0] >= 3:
    int_type = int
else:
    int_type = long


@make_cffi
class TestTrainDictionary(TestCase):
    def test_no_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary()

    def test_bad_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary(8192, u"foo")

        with self.assertRaises(ValueError):
            zstd.train_dictionary(8192, [u"foo"])

    def test_no_params(self):
        d = zstd.train_dictionary(8192, random_input_data())
        self.assertIsInstance(d.dict_id(), int_type)

        # The dictionary ID may be different across platforms.
        expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())

        data = d.as_bytes()
        self.assertEqual(data[0:8], expected)

    def test_basic(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
        self.assertIsInstance(d.dict_id(), int_type)

        data = d.as_bytes()
        self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")

        self.assertEqual(d.k, 64)
        self.assertEqual(d.d, 16)

    def test_set_dict_id(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42)
        self.assertEqual(d.dict_id(), 42)

    def test_optimize(self):
        d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16)

        # This varies by platform.
        self.assertIn(d.k, (50, 2000))
        self.assertEqual(d.d, 16)


@make_cffi
class TestCompressionDict(TestCase):
    def test_bad_mode(self):
        with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
            zstd.ZstdCompressionDict(b"foo", dict_type=42)

    def test_bad_precompute_compress(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)

        with self.assertRaisesRegex(ValueError, "must specify one of level or "):
            d.precompute_compress()

        with self.assertRaisesRegex(ValueError, "must only specify one of level or "):
            d.precompute_compress(
                level=3, compression_params=zstd.CompressionParameters()
            )

    def test_precompute_compress_rawcontent(self):
        d = zstd.ZstdCompressionDict(
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
        )
        d.precompute_compress(level=1)

        d = zstd.ZstdCompressionDict(
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
        )
        with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"):
            d.precompute_compress(level=1)