Mercurial > hg
view contrib/python-zstandard/c-ext/compressionparams.c @ 47072:4c041c71ec01
revlog: introduce an explicit tracking of what the revlog is about
Since the dawn of time, people have been forced to rely to lossy introspection
of the index filename to determine what the purpose and role of the revlog they
encounter is. This is hacky, error prone, inflexible, abstraction-leaky,
<insert-your-own-complaints-here>.
In f63299ee7e4d Raphaël introduced a new attribute to track this information:
`revlog_kind`. However it is initialized in an odd place and various instances
end up not having it set. In addition is only tracking some of the information
we end up having to introspect in various pieces of code.
So we add a new attribute that holds more data and is more strictly enforced.
This work is done in collaboration with Raphaël.
The `revlog_kind` one will be removed/adapted in the next changeset. We expect
to be able to clean up various existing piece of code and to simplify coming
work around the newer revlog format.
Differential Revision: https://phab.mercurial-scm.org/D10352
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Tue, 06 Apr 2021 05:20:24 +0200 |
parents | e92ca942ddca |
children |
line wrap: on
line source
/** * Copyright (c) 2016-present, Gregory Szorc * All rights reserved. * * This software may be modified and distributed under the terms * of the BSD license. See the LICENSE file for details. */ #include "python-zstandard.h" extern PyObject* ZstdError; int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) { size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value); if (ZSTD_isError(zresult)) { PyErr_Format(ZstdError, "unable to set compression context parameter: %s", ZSTD_getErrorName(zresult)); return 1; } return 0; } #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1; #define TRY_COPY_PARAMETER(source, dest, param) { \ int result; \ size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \ if (ZSTD_isError(zresult)) { \ return 1; \ } \ zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \ if (ZSTD_isError(zresult)) { \ return 1; \ } \ } int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) { TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog); TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog); return 0; } int reset_params(ZstdCompressionParametersObject* params) { if (params->params) { ZSTD_CCtxParams_reset(params->params); } else { params->params = ZSTD_createCCtxParams(); if (!params->params) { PyErr_NoMemory(); return 1; } } return set_parameters(params->params, params); } #define TRY_GET_PARAMETER(params, param, value) { \ size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \ if (ZSTD_isError(zresult)) { \ PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \ return 1; \ } \ } int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) { int value; TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value); cparams->windowLog = value; TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value); cparams->chainLog = value; TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value); cparams->hashLog = value; TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value); cparams->searchLog = value; TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value); cparams->minMatch = value; TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value); cparams->targetLength = value; TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value); cparams->strategy = value; return 0; } static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) { static char* kwlist[] = { "format", "compression_level", "window_log", "hash_log", "chain_log", "search_log", "min_match", "target_length", "compression_strategy", "strategy", "write_content_size", "write_checksum", "write_dict_id", "job_size", "overlap_log", "overlap_size_log", "force_max_window", "enable_ldm", "ldm_hash_log", "ldm_min_match", "ldm_bucket_size_log", "ldm_hash_rate_log", "ldm_hash_every_log", "threads", NULL }; int format = 0; int compressionLevel = 0; int windowLog = 0; int hashLog = 0; int chainLog = 0; int searchLog = 0; int minMatch = 0; int targetLength = 0; int compressionStrategy = -1; int strategy = -1; int contentSizeFlag = 1; int checksumFlag = 0; int dictIDFlag = 0; int jobSize = 0; int overlapLog = -1; int overlapSizeLog = -1; int forceMaxWindow = 0; int enableLDM = 0; int ldmHashLog = 0; int ldmMinMatch = 0; int ldmBucketSizeLog = 0; int ldmHashRateLog = -1; int ldmHashEveryLog = -1; int threads = 0; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters", kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog, &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy, &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog, &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) { return -1; } if (reset_params(self)) { return -1; } if (threads < 0) { threads = cpu_count(); } /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog * because setting ZSTD_c_nbWorkers resets the other parameters. */ TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads); TRY_SET_PARAMETER(self->params, ZSTD_c_format, format); TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel); TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog); TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog); TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog); TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog); TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch); TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength); if (compressionStrategy != -1 && strategy != -1) { PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy"); return -1; } if (compressionStrategy != -1) { strategy = compressionStrategy; } else if (strategy == -1) { strategy = 0; } TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy); TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag); TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag); TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag); TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize); if (overlapLog != -1 && overlapSizeLog != -1) { PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log"); return -1; } if (overlapSizeLog != -1) { overlapLog = overlapSizeLog; } else if (overlapLog == -1) { overlapLog = 0; } TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog); TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow); TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM); TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog); TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch); TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog); if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) { PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog"); return -1; } if (ldmHashEveryLog != -1) { ldmHashRateLog = ldmHashEveryLog; } else if (ldmHashRateLog == -1) { ldmHashRateLog = 0; } TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog); return 0; } PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__, "Create a CompressionParameters from a compression level and target sizes\n" ); ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) { int managedKwargs = 0; int level; PyObject* sourceSize = NULL; PyObject* dictSize = NULL; unsigned PY_LONG_LONG iSourceSize = 0; Py_ssize_t iDictSize = 0; PyObject* val; ZSTD_compressionParameters params; ZstdCompressionParametersObject* result = NULL; int res; if (!PyArg_ParseTuple(args, "i:from_level", &level)) { return NULL; } if (!kwargs) { kwargs = PyDict_New(); if (!kwargs) { return NULL; } managedKwargs = 1; } sourceSize = PyDict_GetItemString(kwargs, "source_size"); if (sourceSize) { #if PY_MAJOR_VERSION >= 3 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize); if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) { goto cleanup; } #else iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize); #endif PyDict_DelItemString(kwargs, "source_size"); } dictSize = PyDict_GetItemString(kwargs, "dict_size"); if (dictSize) { #if PY_MAJOR_VERSION >= 3 iDictSize = PyLong_AsSsize_t(dictSize); #else iDictSize = PyInt_AsSsize_t(dictSize); #endif if (iDictSize == -1) { goto cleanup; } PyDict_DelItemString(kwargs, "dict_size"); } params = ZSTD_getCParams(level, iSourceSize, iDictSize); /* Values derived from the input level and sizes are passed along to the constructor. But only if a value doesn't already exist. */ val = PyDict_GetItemString(kwargs, "window_log"); if (!val) { val = PyLong_FromUnsignedLong(params.windowLog); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "window_log", val); Py_DECREF(val); } val = PyDict_GetItemString(kwargs, "chain_log"); if (!val) { val = PyLong_FromUnsignedLong(params.chainLog); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "chain_log", val); Py_DECREF(val); } val = PyDict_GetItemString(kwargs, "hash_log"); if (!val) { val = PyLong_FromUnsignedLong(params.hashLog); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "hash_log", val); Py_DECREF(val); } val = PyDict_GetItemString(kwargs, "search_log"); if (!val) { val = PyLong_FromUnsignedLong(params.searchLog); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "search_log", val); Py_DECREF(val); } val = PyDict_GetItemString(kwargs, "min_match"); if (!val) { val = PyLong_FromUnsignedLong(params.minMatch); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "min_match", val); Py_DECREF(val); } val = PyDict_GetItemString(kwargs, "target_length"); if (!val) { val = PyLong_FromUnsignedLong(params.targetLength); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "target_length", val); Py_DECREF(val); } val = PyDict_GetItemString(kwargs, "compression_strategy"); if (!val) { val = PyLong_FromUnsignedLong(params.strategy); if (!val) { goto cleanup; } PyDict_SetItemString(kwargs, "compression_strategy", val); Py_DECREF(val); } result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType); if (!result) { goto cleanup; } result->params = NULL; val = PyTuple_New(0); if (!val) { Py_CLEAR(result); goto cleanup; } res = ZstdCompressionParameters_init(result, val, kwargs); Py_DECREF(val); if (res) { Py_CLEAR(result); goto cleanup; } cleanup: if (managedKwargs) { Py_DECREF(kwargs); } return result; } PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__, "Estimate the size in bytes of a compression context for compression parameters\n" ); PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) { return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params)); } PyDoc_STRVAR(ZstdCompressionParameters__doc__, "ZstdCompressionParameters: low-level control over zstd compression"); static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) { if (self->params) { ZSTD_freeCCtxParams(self->params); self->params = NULL; } PyObject_Del(self); } #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \ int result; \ size_t zresult; \ ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \ zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \ if (ZSTD_isError(zresult)) { \ PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ ZSTD_getErrorName(zresult)); \ return NULL; \ } \ return PyLong_FromLong(result); \ } PARAM_GETTER(format, ZSTD_c_format) PARAM_GETTER(compression_level, ZSTD_c_compressionLevel) PARAM_GETTER(window_log, ZSTD_c_windowLog) PARAM_GETTER(hash_log, ZSTD_c_hashLog) PARAM_GETTER(chain_log, ZSTD_c_chainLog) PARAM_GETTER(search_log, ZSTD_c_searchLog) PARAM_GETTER(min_match, ZSTD_c_minMatch) PARAM_GETTER(target_length, ZSTD_c_targetLength) PARAM_GETTER(compression_strategy, ZSTD_c_strategy) PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag) PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag) PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag) PARAM_GETTER(job_size, ZSTD_c_jobSize) PARAM_GETTER(overlap_log, ZSTD_c_overlapLog) PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow) PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching) PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog) PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch) PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog) PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog) PARAM_GETTER(threads, ZSTD_c_nbWorkers) static PyMethodDef ZstdCompressionParameters_methods[] = { { "from_level", (PyCFunction)CompressionParameters_from_level, METH_VARARGS | METH_KEYWORDS | METH_STATIC, ZstdCompressionParameters_from_level__doc__ }, { "estimated_compression_context_size", (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size, METH_NOARGS, ZstdCompressionParameters_estimated_compression_context_size__doc__ }, { NULL, NULL } }; #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL } static PyGetSetDef ZstdCompressionParameters_getset[] = { GET_SET_ENTRY(format), GET_SET_ENTRY(compression_level), GET_SET_ENTRY(window_log), GET_SET_ENTRY(hash_log), GET_SET_ENTRY(chain_log), GET_SET_ENTRY(search_log), GET_SET_ENTRY(min_match), GET_SET_ENTRY(target_length), GET_SET_ENTRY(compression_strategy), GET_SET_ENTRY(write_content_size), GET_SET_ENTRY(write_checksum), GET_SET_ENTRY(write_dict_id), GET_SET_ENTRY(threads), GET_SET_ENTRY(job_size), GET_SET_ENTRY(overlap_log), /* TODO remove this deprecated attribute */ { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL }, GET_SET_ENTRY(force_max_window), GET_SET_ENTRY(enable_ldm), GET_SET_ENTRY(ldm_hash_log), GET_SET_ENTRY(ldm_min_match), GET_SET_ENTRY(ldm_bucket_size_log), GET_SET_ENTRY(ldm_hash_rate_log), /* TODO remove this deprecated attribute */ { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL }, { NULL } }; PyTypeObject ZstdCompressionParametersType = { PyVarObject_HEAD_INIT(NULL, 0) "ZstdCompressionParameters", /* tp_name */ sizeof(ZstdCompressionParametersObject), /* tp_basicsize */ 0, /* tp_itemsize */ (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */ 0, /* tp_print */ 0, /* tp_getattr */ 0, /* tp_setattr */ 0, /* tp_compare */ 0, /* tp_repr */ 0, /* tp_as_number */ 0, /* tp_as_sequence */ 0, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ 0, /* tp_getattro */ 0, /* tp_setattro */ 0, /* tp_as_buffer */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ ZstdCompressionParameters__doc__, /* tp_doc */ 0, /* tp_traverse */ 0, /* tp_clear */ 0, /* tp_richcompare */ 0, /* tp_weaklistoffset */ 0, /* tp_iter */ 0, /* tp_iternext */ ZstdCompressionParameters_methods, /* tp_methods */ 0, /* tp_members */ ZstdCompressionParameters_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ 0, /* tp_descr_set */ 0, /* tp_dictoffset */ (initproc)ZstdCompressionParameters_init, /* tp_init */ 0, /* tp_alloc */ PyType_GenericNew, /* tp_new */ }; void compressionparams_module_init(PyObject* mod) { Py_SET_TYPE(&ZstdCompressionParametersType, &PyType_Type); if (PyType_Ready(&ZstdCompressionParametersType) < 0) { return; } Py_INCREF(&ZstdCompressionParametersType); PyModule_AddObject(mod, "ZstdCompressionParameters", (PyObject*)&ZstdCompressionParametersType); /* TODO remove deprecated alias. */ Py_INCREF(&ZstdCompressionParametersType); PyModule_AddObject(mod, "CompressionParameters", (PyObject*)&ZstdCompressionParametersType); }