comparison contrib/python-zstandard/c-ext/compressor.c @ 30822:b54a2984cdd4

zstd: vendor python-zstandard 0.6.0 Commit 63c68d6f5fc8de4afd9bde81b13b537beb4e47e8 from https://github.com/indygreg/python-zstandard is imported without modifications (other than removing unwanted files). This includes minor performance and feature improvements. It also changes the vendored zstd library from 1.1.1 to 1.1.2. # no-check-commit
author Gregory Szorc <gregory.szorc@gmail.com>
date Sat, 14 Jan 2017 19:41:43 -0800
parents b86a448a2965
children 08fa3a76a080
comparison
equal deleted inserted replaced
30821:7005c03f7387 30822:b54a2984cdd4
7 */ 7 */
8 8
9 #include "python-zstandard.h" 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError; 11 extern PyObject* ZstdError;
12
13 int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) {
14 ZSTD_customMem zmem;
15 assert(!compressor->cdict);
16 Py_BEGIN_ALLOW_THREADS
17 memset(&zmem, 0, sizeof(zmem));
18 compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData,
19 compressor->dict->dictSize, *zparams, zmem);
20 Py_END_ALLOW_THREADS
21
22 if (!compressor->cdict) {
23 PyErr_SetString(ZstdError, "could not create compression dictionary");
24 return 1;
25 }
26
27 return 0;
28 }
12 29
13 /** 30 /**
14 * Initialize a zstd CStream from a ZstdCompressor instance. 31 * Initialize a zstd CStream from a ZstdCompressor instance.
15 * 32 *
16 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python 33 * Returns a ZSTD_CStream on success or NULL on failure. If NULL, a Python
54 return NULL; 71 return NULL;
55 } 72 }
56 73
57 return cstream; 74 return cstream;
58 } 75 }
59
60 76
61 PyDoc_STRVAR(ZstdCompressor__doc__, 77 PyDoc_STRVAR(ZstdCompressor__doc__,
62 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" 78 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
63 "\n" 79 "\n"
64 "Create an object used to perform Zstandard compression.\n" 80 "Create an object used to perform Zstandard compression.\n"
105 CompressionParametersObject* params = NULL; 121 CompressionParametersObject* params = NULL;
106 PyObject* writeChecksum = NULL; 122 PyObject* writeChecksum = NULL;
107 PyObject* writeContentSize = NULL; 123 PyObject* writeContentSize = NULL;
108 PyObject* writeDictID = NULL; 124 PyObject* writeDictID = NULL;
109 125
126 self->cctx = NULL;
110 self->dict = NULL; 127 self->dict = NULL;
111 self->cparams = NULL; 128 self->cparams = NULL;
112 self->cdict = NULL; 129 self->cdict = NULL;
113 130
114 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist, 131 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO", kwlist,
127 PyErr_Format(PyExc_ValueError, "level must be less than %d", 144 PyErr_Format(PyExc_ValueError, "level must be less than %d",
128 ZSTD_maxCLevel() + 1); 145 ZSTD_maxCLevel() + 1);
129 return -1; 146 return -1;
130 } 147 }
131 148
149 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
150 overhead of each compression operation. */
151 self->cctx = ZSTD_createCCtx();
152 if (!self->cctx) {
153 PyErr_NoMemory();
154 return -1;
155 }
156
132 self->compressionLevel = level; 157 self->compressionLevel = level;
133 158
134 if (dict) { 159 if (dict) {
135 self->dict = dict; 160 self->dict = dict;
136 Py_INCREF(dict); 161 Py_INCREF(dict);
161 Py_XDECREF(self->dict); 186 Py_XDECREF(self->dict);
162 187
163 if (self->cdict) { 188 if (self->cdict) {
164 ZSTD_freeCDict(self->cdict); 189 ZSTD_freeCDict(self->cdict);
165 self->cdict = NULL; 190 self->cdict = NULL;
191 }
192
193 if (self->cctx) {
194 ZSTD_freeCCtx(self->cctx);
195 self->cctx = NULL;
166 } 196 }
167 197
168 PyObject_Del(self); 198 PyObject_Del(self);
169 } 199 }
170 200
337 367
338 return res; 368 return res;
339 } 369 }
340 370
341 PyDoc_STRVAR(ZstdCompressor_compress__doc__, 371 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
342 "compress(data)\n" 372 "compress(data, allow_empty=False)\n"
343 "\n" 373 "\n"
344 "Compress data in a single operation.\n" 374 "Compress data in a single operation.\n"
345 "\n" 375 "\n"
346 "This is the simplest mechanism to perform compression: simply pass in a\n" 376 "This is the simplest mechanism to perform compression: simply pass in a\n"
347 "value and get a compressed value back. It is almost the most prone to abuse.\n" 377 "value and get a compressed value back. It is almost the most prone to abuse.\n"
348 "The input and output values must fit in memory, so passing in very large\n" 378 "The input and output values must fit in memory, so passing in very large\n"
349 "values can result in excessive memory usage. For this reason, one of the\n" 379 "values can result in excessive memory usage. For this reason, one of the\n"
350 "streaming based APIs is preferred for larger values.\n" 380 "streaming based APIs is preferred for larger values.\n"
351 ); 381 );
352 382
353 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args) { 383 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
384 static char* kwlist[] = {
385 "data",
386 "allow_empty",
387 NULL
388 };
389
354 const char* source; 390 const char* source;
355 Py_ssize_t sourceSize; 391 Py_ssize_t sourceSize;
392 PyObject* allowEmpty = NULL;
356 size_t destSize; 393 size_t destSize;
357 ZSTD_CCtx* cctx;
358 PyObject* output; 394 PyObject* output;
359 char* dest; 395 char* dest;
360 void* dictData = NULL; 396 void* dictData = NULL;
361 size_t dictSize = 0; 397 size_t dictSize = 0;
362 size_t zresult; 398 size_t zresult;
363 ZSTD_parameters zparams; 399 ZSTD_parameters zparams;
364 ZSTD_customMem zmem;
365 400
366 #if PY_MAJOR_VERSION >= 3 401 #if PY_MAJOR_VERSION >= 3
367 if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { 402 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O",
368 #else 403 #else
369 if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { 404 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O",
370 #endif 405 #endif
406 kwlist, &source, &sourceSize, &allowEmpty)) {
407 return NULL;
408 }
409
410 /* Limitation in zstd C API doesn't let decompression side distinguish
411 between content size of 0 and unknown content size. This can make round
412 tripping via Python difficult. Until this is fixed, require a flag
413 to fire the footgun.
414 https://github.com/indygreg/python-zstandard/issues/11 */
415 if (0 == sourceSize && self->fparams.contentSizeFlag
416 && (!allowEmpty || PyObject_Not(allowEmpty))) {
417 PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes");
371 return NULL; 418 return NULL;
372 } 419 }
373 420
374 destSize = ZSTD_compressBound(sourceSize); 421 destSize = ZSTD_compressBound(sourceSize);
375 output = PyBytes_FromStringAndSize(NULL, destSize); 422 output = PyBytes_FromStringAndSize(NULL, destSize);
376 if (!output) { 423 if (!output) {
377 return NULL; 424 return NULL;
378 } 425 }
379 426
380 dest = PyBytes_AsString(output); 427 dest = PyBytes_AsString(output);
381
382 cctx = ZSTD_createCCtx();
383 if (!cctx) {
384 Py_DECREF(output);
385 PyErr_SetString(ZstdError, "could not create CCtx");
386 return NULL;
387 }
388 428
389 if (self->dict) { 429 if (self->dict) {
390 dictData = self->dict->dictData; 430 dictData = self->dict->dictData;
391 dictSize = self->dict->dictSize; 431 dictSize = self->dict->dictSize;
392 } 432 }
404 zparams.fParams = self->fparams; 444 zparams.fParams = self->fparams;
405 445
406 /* The raw dict data has to be processed before it can be used. Since this 446 /* The raw dict data has to be processed before it can be used. Since this
407 adds overhead - especially if multiple dictionary compression operations 447 adds overhead - especially if multiple dictionary compression operations
408 are performed on the same ZstdCompressor instance - we create a 448 are performed on the same ZstdCompressor instance - we create a
409 ZSTD_CDict once and reuse it for all operations. */ 449 ZSTD_CDict once and reuse it for all operations.
410 450
411 /* TODO the zparams (which can be derived from the source data size) used 451 Note: the compression parameters used for the first invocation (possibly
412 on first invocation are effectively reused for subsequent operations. This 452 derived from the source size) will be reused on all subsequent invocations.
413 may not be appropriate if input sizes vary significantly and could affect 453 https://github.com/facebook/zstd/issues/358 contains more info. We could
414 chosen compression parameters. 454 potentially add an argument somewhere to control this behavior.
415 https://github.com/facebook/zstd/issues/358 tracks this issue. */ 455 */
416 if (dictData && !self->cdict) { 456 if (dictData && !self->cdict) {
417 Py_BEGIN_ALLOW_THREADS 457 if (populate_cdict(self, dictData, dictSize, &zparams)) {
418 memset(&zmem, 0, sizeof(zmem));
419 self->cdict = ZSTD_createCDict_advanced(dictData, dictSize, zparams, zmem);
420 Py_END_ALLOW_THREADS
421
422 if (!self->cdict) {
423 Py_DECREF(output); 458 Py_DECREF(output);
424 ZSTD_freeCCtx(cctx);
425 PyErr_SetString(ZstdError, "could not create compression dictionary");
426 return NULL; 459 return NULL;
427 } 460 }
428 } 461 }
429 462
430 Py_BEGIN_ALLOW_THREADS 463 Py_BEGIN_ALLOW_THREADS
431 /* By avoiding ZSTD_compress(), we don't necessarily write out content 464 /* By avoiding ZSTD_compress(), we don't necessarily write out content
432 size. This means the argument to ZstdCompressor to control frame 465 size. This means the argument to ZstdCompressor to control frame
433 parameters is honored. */ 466 parameters is honored. */
434 if (self->cdict) { 467 if (self->cdict) {
435 zresult = ZSTD_compress_usingCDict(cctx, dest, destSize, 468 zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize,
436 source, sourceSize, self->cdict); 469 source, sourceSize, self->cdict);
437 } 470 }
438 else { 471 else {
439 zresult = ZSTD_compress_advanced(cctx, dest, destSize, 472 zresult = ZSTD_compress_advanced(self->cctx, dest, destSize,
440 source, sourceSize, dictData, dictSize, zparams); 473 source, sourceSize, dictData, dictSize, zparams);
441 } 474 }
442 Py_END_ALLOW_THREADS 475 Py_END_ALLOW_THREADS
443
444 ZSTD_freeCCtx(cctx);
445 476
446 if (ZSTD_isError(zresult)) { 477 if (ZSTD_isError(zresult)) {
447 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); 478 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
448 Py_CLEAR(output); 479 Py_CLEAR(output);
449 return NULL; 480 return NULL;
498 result->output.pos = 0; 529 result->output.pos = 0;
499 530
500 result->compressor = self; 531 result->compressor = self;
501 Py_INCREF(result->compressor); 532 Py_INCREF(result->compressor);
502 533
503 result->flushed = 0; 534 result->finished = 0;
504 535
505 return result; 536 return result;
506 } 537 }
507 538
508 PyDoc_STRVAR(ZstdCompressor_read_from__doc__, 539 PyDoc_STRVAR(ZstdCompressor_read_from__doc__,
689 720
690 return result; 721 return result;
691 } 722 }
692 723
693 static PyMethodDef ZstdCompressor_methods[] = { 724 static PyMethodDef ZstdCompressor_methods[] = {
694 { "compress", (PyCFunction)ZstdCompressor_compress, METH_VARARGS, 725 { "compress", (PyCFunction)ZstdCompressor_compress,
695 ZstdCompressor_compress__doc__ }, 726 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
696 { "compressobj", (PyCFunction)ZstdCompressor_compressobj, 727 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
697 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, 728 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
698 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, 729 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
699 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, 730 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
700 { "read_from", (PyCFunction)ZstdCompressor_read_from, 731 { "read_from", (PyCFunction)ZstdCompressor_read_from,