contrib/python-zstandard/tests/test_train_dictionary.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Thu, 08 Jul 2021 03:03:34 +0200
changeset 47593 f927ad5a4e2c
parent 44147 5e84a96d865b
permissions -rw-r--r--
dirstate: add a `set_tracked` method for "hg add"-like usage This is a step further toward clarifying the semantic of various dirstate call. Having a dedicated function comes with a couple of benefits: 1) we can move duplicated logic about how to handle the previous state within the dirstate. Since we are sure this is always called in the same situation, we can implement that logic once in the dirstate. 2) having a dedicated method for this case unlock also having a dedicated method for the other case and recording more information at that time. All this leading having more code within the dirstate and higher level API that are less error prone. Differential Revision: https://phab.mercurial-scm.org/D11013

import struct
import sys
import unittest

import zstandard as zstd

from .common import (
    generate_samples,
    make_cffi,
    random_input_data,
    TestCase,
)

if sys.version_info[0] >= 3:
    int_type = int
else:
    int_type = long


@make_cffi
class TestTrainDictionary(TestCase):
    def test_no_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary()

    def test_bad_args(self):
        with self.assertRaises(TypeError):
            zstd.train_dictionary(8192, u"foo")

        with self.assertRaises(ValueError):
            zstd.train_dictionary(8192, [u"foo"])

    def test_no_params(self):
        d = zstd.train_dictionary(8192, random_input_data())
        self.assertIsInstance(d.dict_id(), int_type)

        # The dictionary ID may be different across platforms.
        expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())

        data = d.as_bytes()
        self.assertEqual(data[0:8], expected)

    def test_basic(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
        self.assertIsInstance(d.dict_id(), int_type)

        data = d.as_bytes()
        self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")

        self.assertEqual(d.k, 64)
        self.assertEqual(d.d, 16)

    def test_set_dict_id(self):
        d = zstd.train_dictionary(
            8192, generate_samples(), k=64, d=16, dict_id=42
        )
        self.assertEqual(d.dict_id(), 42)

    def test_optimize(self):
        d = zstd.train_dictionary(
            8192, generate_samples(), threads=-1, steps=1, d=16
        )

        # This varies by platform.
        self.assertIn(d.k, (50, 2000))
        self.assertEqual(d.d, 16)


@make_cffi
class TestCompressionDict(TestCase):
    def test_bad_mode(self):
        with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
            zstd.ZstdCompressionDict(b"foo", dict_type=42)

    def test_bad_precompute_compress(self):
        d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)

        with self.assertRaisesRegex(
            ValueError, "must specify one of level or "
        ):
            d.precompute_compress()

        with self.assertRaisesRegex(
            ValueError, "must only specify one of level or "
        ):
            d.precompute_compress(
                level=3, compression_params=zstd.CompressionParameters()
            )

    def test_precompute_compress_rawcontent(self):
        d = zstd.ZstdCompressionDict(
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
        )
        d.precompute_compress(level=1)

        d = zstd.ZstdCompressionDict(
            b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
        )
        with self.assertRaisesRegex(
            zstd.ZstdError, "unable to precompute dictionary"
        ):
            d.precompute_compress(level=1)