Mercurial > hg
comparison contrib/python-zstandard/c-ext/compressor.c @ 30822:b54a2984cdd4
zstd: vendor python-zstandard 0.6.0
Commit 63c68d6f5fc8de4afd9bde81b13b537beb4e47e8 from
https://github.com/indygreg/python-zstandard is imported without
modifications (other than removing unwanted files).
This includes minor performance and feature improvements. It also
changes the vendored zstd library from 1.1.1 to 1.1.2.
# no-check-commit
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Sat, 14 Jan 2017 19:41:43 -0800 |
parents | b86a448a2965 |
children | 08fa3a76a080 |
comparison
equal
deleted
inserted
replaced
30821:7005c03f7387 | 30822:b54a2984cdd4 |
---|---|
7 */ | 7 */ |
8 | 8 |
9 #include "python-zstandard.h" | 9 #include "python-zstandard.h" |
10 | 10 |
11 extern PyObject* ZstdError; | 11 extern PyObject* ZstdError; |
12 | |
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) { | |
14 ZSTD_customMem zmem; | |
15 assert(!compressor->cdict); | |
16 Py_BEGIN_ALLOW_THREADS | |
17 memset(&zmem, 0, sizeof(zmem)); | |
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, | |
19 compressor->dict->dictSize, *zparams, zmem); | |
20 Py_END_ALLOW_THREADS | |
21 | |
22 if (!compressor->cdict) { | |
23 PyErr_SetString(ZstdError, "could not create compression dictionary"); | |
24 return 1; | |
25 } | |
26 | |
27 return 0; | |
28 } | |
12 | 29 |
13 /** | 30 /** |
14 * Initialize a zstd CStream from a ZstdCompressor instance. | 31 * Initialize a zstd CStream from a ZstdCompressor instance. |
15 * | 32 * |
16 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python | 33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python |
54 return NULL; | 71 return NULL; |
55 } | 72 } |
56 | 73 |
57 return cstream; | 74 return cstream; |
58 } | 75 } |
59 | |
60 | 76 |
61 PyDoc_STRVAR(ZstdCompressor__doc__, | 77 PyDoc_STRVAR(ZstdCompressor__doc__, |
62 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" | 78 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" |
63 "\n" | 79 "\n" |
64 "Create an object used to perform Zstandard compression.\n" | 80 "Create an object used to perform Zstandard compression.\n" |
105 CompressionParametersObject* params = NULL; | 121 CompressionParametersObject* params = NULL; |
106 PyObject* writeChecksum = NULL; | 122 PyObject* writeChecksum = NULL; |
107 PyObject* writeContentSize = NULL; | 123 PyObject* writeContentSize = NULL; |
108 PyObject* writeDictID = NULL; | 124 PyObject* writeDictID = NULL; |
109 | 125 |
126 self->cctx = NULL; | |
110 self->dict = NULL; | 127 self->dict = NULL; |
111 self->cparams = NULL; | 128 self->cparams = NULL; |
112 self->cdict = NULL; | 129 self->cdict = NULL; |
113 | 130 |
114 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist, | 131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist, |
127 PyErr_Format(PyExc_ValueError, "level must be less than %d", | 144 PyErr_Format(PyExc_ValueError, "level must be less than %d", |
128 ZSTD_maxCLevel() + 1); | 145 ZSTD_maxCLevel() + 1); |
129 return -1; | 146 return -1; |
130 } | 147 } |
131 | 148 |
149 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the | |
150 overhead of each compression operation. */ | |
151 self->cctx = ZSTD_createCCtx(); | |
152 if (!self->cctx) { | |
153 PyErr_NoMemory(); | |
154 return -1; | |
155 } | |
156 | |
132 self->compressionLevel = level; | 157 self->compressionLevel = level; |
133 | 158 |
134 if (dict) { | 159 if (dict) { |
135 self->dict = dict; | 160 self->dict = dict; |
136 Py_INCREF(dict); | 161 Py_INCREF(dict); |
161 Py_XDECREF(self->dict); | 186 Py_XDECREF(self->dict); |
162 | 187 |
163 if (self->cdict) { | 188 if (self->cdict) { |
164 ZSTD_freeCDict(self->cdict); | 189 ZSTD_freeCDict(self->cdict); |
165 self->cdict = NULL; | 190 self->cdict = NULL; |
191 } | |
192 | |
193 if (self->cctx) { | |
194 ZSTD_freeCCtx(self->cctx); | |
195 self->cctx = NULL; | |
166 } | 196 } |
167 | 197 |
168 PyObject_Del(self); | 198 PyObject_Del(self); |
169 } | 199 } |
170 | 200 |
337 | 367 |
338 return res; | 368 return res; |
339 } | 369 } |
340 | 370 |
341 PyDoc_STRVAR(ZstdCompressor_compress__doc__, | 371 PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
342 "compress(data)\n" | 372 "compress(data, allow_empty=False)\n" |
343 "\n" | 373 "\n" |
344 "Compress data in a single operation.\n" | 374 "Compress data in a single operation.\n" |
345 "\n" | 375 "\n" |
346 "This is the simplest mechanism to perform compression: simply pass in a\n" | 376 "This is the simplest mechanism to perform compression: simply pass in a\n" |
347 "value and get a compressed value back. It is almost the most prone to abuse.\n" | 377 "value and get a compressed value back. It is almost the most prone to abuse.\n" |
348 "The input and output values must fit in memory, so passing in very large\n" | 378 "The input and output values must fit in memory, so passing in very large\n" |
349 "values can result in excessive memory usage. For this reason, one of the\n" | 379 "values can result in excessive memory usage. For this reason, one of the\n" |
350 "streaming based APIs is preferred for larger values.\n" | 380 "streaming based APIs is preferred for larger values.\n" |
351 ); | 381 ); |
352 | 382 |
353 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args) { | 383 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
384 static char* kwlist[] = { | |
385 "data", | |
386 "allow_empty", | |
387 NULL | |
388 }; | |
389 | |
354 const char* source; | 390 const char* source; |
355 Py_ssize_t sourceSize; | 391 Py_ssize_t sourceSize; |
392 PyObject* allowEmpty = NULL; | |
356 size_t destSize; | 393 size_t destSize; |
357 ZSTD_CCtx* cctx; | |
358 PyObject* output; | 394 PyObject* output; |
359 char* dest; | 395 char* dest; |
360 void* dictData = NULL; | 396 void* dictData = NULL; |
361 size_t dictSize = 0; | 397 size_t dictSize = 0; |
362 size_t zresult; | 398 size_t zresult; |
363 ZSTD_parameters zparams; | 399 ZSTD_parameters zparams; |
364 ZSTD_customMem zmem; | |
365 | 400 |
366 #if PY_MAJOR_VERSION >= 3 | 401 #if PY_MAJOR_VERSION >= 3 |
367 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { | 402 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O", |
368 #else | 403 #else |
369 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { | 404 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O", |
370 #endif | 405 #endif |
406 kwlist, &source, &sourceSize, &allowEmpty)) { | |
407 return NULL; | |
408 } | |
409 | |
410 /* Limitation in zstd C API doesn't let decompression side distinguish | |
411 between content size of 0 and unknown content size. This can make round | |
412 tripping via Python difficult. Until this is fixed, require a flag | |
413 to fire the footgun. | |
414 https://github.com/indygreg/python-zstandard/issues/11 */ | |
415 if (0 == sourceSize && self->fparams.contentSizeFlag | |
416 && (!allowEmpty || PyObject_Not(allowEmpty))) { | |
417 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); | |
371 return NULL; | 418 return NULL; |
372 } | 419 } |
373 | 420 |
374 destSize = ZSTD_compressBound(sourceSize); | 421 destSize = ZSTD_compressBound(sourceSize); |
375 output = PyBytes_FromStringAndSize(NULL, destSize); | 422 output = PyBytes_FromStringAndSize(NULL, destSize); |
376 if (!output) { | 423 if (!output) { |
377 return NULL; | 424 return NULL; |
378 } | 425 } |
379 | 426 |
380 dest = PyBytes_AsString(output); | 427 dest = PyBytes_AsString(output); |
381 | |
382 cctx = ZSTD_createCCtx(); | |
383 if (!cctx) { | |
384 Py_DECREF(output); | |
385 PyErr_SetString(ZstdError, "could not create CCtx"); | |
386 return NULL; | |
387 } | |
388 | 428 |
389 if (self->dict) { | 429 if (self->dict) { |
390 dictData = self->dict->dictData; | 430 dictData = self->dict->dictData; |
391 dictSize = self->dict->dictSize; | 431 dictSize = self->dict->dictSize; |
392 } | 432 } |
404 zparams.fParams = self->fparams; | 444 zparams.fParams = self->fparams; |
405 | 445 |
406 /* The raw dict data has to be processed before it can be used. Since this | 446 /* The raw dict data has to be processed before it can be used. Since this |
407 adds overhead - especially if multiple dictionary compression operations | 447 adds overhead - especially if multiple dictionary compression operations |
408 are performed on the same ZstdCompressor instance - we create a | 448 are performed on the same ZstdCompressor instance - we create a |
409 ZSTD_CDict once and reuse it for all operations. */ | 449 ZSTD_CDict once and reuse it for all operations. |
410 | 450 |
411 /* TODO the zparams (which can be derived from the source data size) used | 451 Note: the compression parameters used for the first invocation (possibly |
412 on first invocation are effectively reused for subsequent operations. This | 452 derived from the source size) will be reused on all subsequent invocations. |
413 may not be appropriate if input sizes vary significantly and could affect | 453 https://github.com/facebook/zstd/issues/358 contains more info. We could |
414 chosen compression parameters. | 454 potentially add an argument somewhere to control this behavior. |
415 https://github.com/facebook/zstd/issues/358 tracks this issue. */ | 455 */ |
416 if (dictData && !self->cdict) { | 456 if (dictData && !self->cdict) { |
417 Py_BEGIN_ALLOW_THREADS | 457 if (populate_cdict(self, dictData, dictSize, &zparams)) { |
418 memset(&zmem, 0, sizeof(zmem)); | |
419 self->cdict = ZSTD_createCDict_advanced(dictData, dictSize, zparams, zmem); | |
420 Py_END_ALLOW_THREADS | |
421 | |
422 if (!self->cdict) { | |
423 Py_DECREF(output); | 458 Py_DECREF(output); |
424 ZSTD_freeCCtx(cctx); | |
425 PyErr_SetString(ZstdError, "could not create compression dictionary"); | |
426 return NULL; | 459 return NULL; |
427 } | 460 } |
428 } | 461 } |
429 | 462 |
430 Py_BEGIN_ALLOW_THREADS | 463 Py_BEGIN_ALLOW_THREADS |
431 /* By avoiding ZSTD_compress(), we don't necessarily write out content | 464 /* By avoiding ZSTD_compress(), we don't necessarily write out content |
432 size. This means the argument to ZstdCompressor to control frame | 465 size. This means the argument to ZstdCompressor to control frame |
433 parameters is honored. */ | 466 parameters is honored. */ |
434 if (self->cdict) { | 467 if (self->cdict) { |
435 zresult = ZSTD_compress_usingCDict(cctx, dest, destSize, | 468 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, |
436 source, sourceSize, self->cdict); | 469 source, sourceSize, self->cdict); |
437 } | 470 } |
438 else { | 471 else { |
439 zresult = ZSTD_compress_advanced(cctx, dest, destSize, | 472 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, |
440 source, sourceSize, dictData, dictSize, zparams); | 473 source, sourceSize, dictData, dictSize, zparams); |
441 } | 474 } |
442 Py_END_ALLOW_THREADS | 475 Py_END_ALLOW_THREADS |
443 | |
444 ZSTD_freeCCtx(cctx); | |
445 | 476 |
446 if (ZSTD_isError(zresult)) { | 477 if (ZSTD_isError(zresult)) { |
447 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); | 478 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
448 Py_CLEAR(output); | 479 Py_CLEAR(output); |
449 return NULL; | 480 return NULL; |
498 result->output.pos = 0; | 529 result->output.pos = 0; |
499 | 530 |
500 result->compressor = self; | 531 result->compressor = self; |
501 Py_INCREF(result->compressor); | 532 Py_INCREF(result->compressor); |
502 | 533 |
503 result->flushed = 0; | 534 result->finished = 0; |
504 | 535 |
505 return result; | 536 return result; |
506 } | 537 } |
507 | 538 |
508 PyDoc_STRVAR(ZstdCompressor_read_from__doc__, | 539 PyDoc_STRVAR(ZstdCompressor_read_from__doc__, |
689 | 720 |
690 return result; | 721 return result; |
691 } | 722 } |
692 | 723 |
693 static PyMethodDef ZstdCompressor_methods[] = { | 724 static PyMethodDef ZstdCompressor_methods[] = { |
694 { "compress", (PyCFunction)ZstdCompressor_compress, METH_VARARGS, | 725 { "compress", (PyCFunction)ZstdCompressor_compress, |
695 ZstdCompressor_compress__doc__ }, | 726 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, |
696 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, | 727 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
697 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, | 728 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
698 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, | 729 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
699 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, | 730 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
700 { "read_from", (PyCFunction)ZstdCompressor_read_from, | 731 { "read_from", (PyCFunction)ZstdCompressor_read_from, |