289 Py_ssize_t sourceSize; |
285 Py_ssize_t sourceSize; |
290 Py_ssize_t maxOutputSize = 0; |
286 Py_ssize_t maxOutputSize = 0; |
291 unsigned long long decompressedSize; |
287 unsigned long long decompressedSize; |
292 size_t destCapacity; |
288 size_t destCapacity; |
293 PyObject* result = NULL; |
289 PyObject* result = NULL; |
294 ZSTD_DCtx* dctx = NULL; |
|
295 void* dictData = NULL; |
290 void* dictData = NULL; |
296 size_t dictSize = 0; |
291 size_t dictSize = 0; |
297 size_t zresult; |
292 size_t zresult; |
298 |
293 |
299 #if PY_MAJOR_VERSION >= 3 |
294 #if PY_MAJOR_VERSION >= 3 |
300 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n", kwlist, |
295 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress", |
301 #else |
296 #else |
302 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n", kwlist, |
297 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress", |
303 #endif |
298 #endif |
304 &source, &sourceSize, &maxOutputSize)) { |
299 kwlist, &source, &sourceSize, &maxOutputSize)) { |
305 return NULL; |
300 return NULL; |
306 } |
301 } |
307 |
|
308 dctx = PyMem_Malloc(ZSTD_sizeof_DCtx(self->refdctx)); |
|
309 if (!dctx) { |
|
310 PyErr_NoMemory(); |
|
311 return NULL; |
|
312 } |
|
313 |
|
314 ZSTD_copyDCtx(dctx, self->refdctx); |
|
315 |
302 |
316 if (self->dict) { |
303 if (self->dict) { |
317 dictData = self->dict->dictData; |
304 dictData = self->dict->dictData; |
318 dictSize = self->dict->dictSize; |
305 dictSize = self->dict->dictSize; |
319 } |
306 } |
320 |
307 |
321 if (dictData && !self->ddict) { |
308 if (dictData && !self->ddict) { |
322 Py_BEGIN_ALLOW_THREADS |
309 Py_BEGIN_ALLOW_THREADS |
323 self->ddict = ZSTD_createDDict(dictData, dictSize); |
310 self->ddict = ZSTD_createDDict_byReference(dictData, dictSize); |
324 Py_END_ALLOW_THREADS |
311 Py_END_ALLOW_THREADS |
325 |
312 |
326 if (!self->ddict) { |
313 if (!self->ddict) { |
327 PyErr_SetString(ZstdError, "could not create decompression dict"); |
314 PyErr_SetString(ZstdError, "could not create decompression dict"); |
328 goto except; |
315 return NULL; |
329 } |
316 } |
330 } |
317 } |
331 |
318 |
332 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize); |
319 decompressedSize = ZSTD_getDecompressedSize(source, sourceSize); |
333 /* 0 returned if content size not in the zstd frame header */ |
320 /* 0 returned if content size not in the zstd frame header */ |
334 if (0 == decompressedSize) { |
321 if (0 == decompressedSize) { |
335 if (0 == maxOutputSize) { |
322 if (0 == maxOutputSize) { |
336 PyErr_SetString(ZstdError, "input data invalid or missing content size " |
323 PyErr_SetString(ZstdError, "input data invalid or missing content size " |
337 "in frame header"); |
324 "in frame header"); |
338 goto except; |
325 return NULL; |
339 } |
326 } |
340 else { |
327 else { |
341 result = PyBytes_FromStringAndSize(NULL, maxOutputSize); |
328 result = PyBytes_FromStringAndSize(NULL, maxOutputSize); |
342 destCapacity = maxOutputSize; |
329 destCapacity = maxOutputSize; |
343 } |
330 } |
346 result = PyBytes_FromStringAndSize(NULL, decompressedSize); |
333 result = PyBytes_FromStringAndSize(NULL, decompressedSize); |
347 destCapacity = decompressedSize; |
334 destCapacity = decompressedSize; |
348 } |
335 } |
349 |
336 |
350 if (!result) { |
337 if (!result) { |
351 goto except; |
338 return NULL; |
352 } |
339 } |
353 |
340 |
354 Py_BEGIN_ALLOW_THREADS |
341 Py_BEGIN_ALLOW_THREADS |
355 if (self->ddict) { |
342 if (self->ddict) { |
356 zresult = ZSTD_decompress_usingDDict(dctx, PyBytes_AsString(result), destCapacity, |
343 zresult = ZSTD_decompress_usingDDict(self->dctx, |
|
344 PyBytes_AsString(result), destCapacity, |
357 source, sourceSize, self->ddict); |
345 source, sourceSize, self->ddict); |
358 } |
346 } |
359 else { |
347 else { |
360 zresult = ZSTD_decompressDCtx(dctx, PyBytes_AsString(result), destCapacity, source, sourceSize); |
348 zresult = ZSTD_decompressDCtx(self->dctx, |
|
349 PyBytes_AsString(result), destCapacity, source, sourceSize); |
361 } |
350 } |
362 Py_END_ALLOW_THREADS |
351 Py_END_ALLOW_THREADS |
363 |
352 |
364 if (ZSTD_isError(zresult)) { |
353 if (ZSTD_isError(zresult)) { |
365 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); |
354 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); |
366 goto except; |
355 Py_DecRef(result); |
|
356 return NULL; |
367 } |
357 } |
368 else if (decompressedSize && zresult != decompressedSize) { |
358 else if (decompressedSize && zresult != decompressedSize) { |
369 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", |
359 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", |
370 zresult, decompressedSize); |
360 zresult, decompressedSize); |
371 goto except; |
361 Py_DecRef(result); |
|
362 return NULL; |
372 } |
363 } |
373 else if (zresult < destCapacity) { |
364 else if (zresult < destCapacity) { |
374 if (_PyBytes_Resize(&result, zresult)) { |
365 if (_PyBytes_Resize(&result, zresult)) { |
375 goto except; |
366 Py_DecRef(result); |
376 } |
367 return NULL; |
377 } |
368 } |
378 |
|
379 goto finally; |
|
380 |
|
381 except: |
|
382 Py_DecRef(result); |
|
383 result = NULL; |
|
384 |
|
385 finally: |
|
386 if (dctx) { |
|
387 PyMem_FREE(dctx); |
|
388 } |
369 } |
389 |
370 |
390 return result; |
371 return result; |
391 } |
372 } |
392 |
373 |
599 |
576 |
600 result->outSize = outSize; |
577 result->outSize = outSize; |
601 |
578 |
602 result->entered = 0; |
579 result->entered = 0; |
603 result->dstream = NULL; |
580 result->dstream = NULL; |
|
581 |
|
582 return result; |
|
583 } |
|
584 |
|
585 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__, |
|
586 "Decompress a series of chunks using the content dictionary chaining technique\n" |
|
587 ); |
|
588 |
|
589 static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) { |
|
590 static char* kwlist[] = { |
|
591 "frames", |
|
592 NULL |
|
593 }; |
|
594 |
|
595 PyObject* chunks; |
|
596 Py_ssize_t chunksLen; |
|
597 Py_ssize_t chunkIndex; |
|
598 char parity = 0; |
|
599 PyObject* chunk; |
|
600 char* chunkData; |
|
601 Py_ssize_t chunkSize; |
|
602 ZSTD_DCtx* dctx = NULL; |
|
603 size_t zresult; |
|
604 ZSTD_frameParams frameParams; |
|
605 void* buffer1 = NULL; |
|
606 size_t buffer1Size = 0; |
|
607 size_t buffer1ContentSize = 0; |
|
608 void* buffer2 = NULL; |
|
609 size_t buffer2Size = 0; |
|
610 size_t buffer2ContentSize = 0; |
|
611 void* destBuffer = NULL; |
|
612 PyObject* result = NULL; |
|
613 |
|
614 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", |
|
615 kwlist, &PyList_Type, &chunks)) { |
|
616 return NULL; |
|
617 } |
|
618 |
|
619 chunksLen = PyList_Size(chunks); |
|
620 if (!chunksLen) { |
|
621 PyErr_SetString(PyExc_ValueError, "empty input chain"); |
|
622 return NULL; |
|
623 } |
|
624 |
|
625 /* The first chunk should not be using a dictionary. We handle it specially. */ |
|
626 chunk = PyList_GetItem(chunks, 0); |
|
627 if (!PyBytes_Check(chunk)) { |
|
628 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes"); |
|
629 return NULL; |
|
630 } |
|
631 |
|
632 /* We require that all chunks be zstd frames and that they have content size set. */ |
|
633 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
634 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); |
|
635 if (ZSTD_isError(zresult)) { |
|
636 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); |
|
637 return NULL; |
|
638 } |
|
639 else if (zresult) { |
|
640 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame"); |
|
641 return NULL; |
|
642 } |
|
643 |
|
644 if (0 == frameParams.frameContentSize) { |
|
645 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); |
|
646 return NULL; |
|
647 } |
|
648 |
|
649 dctx = ZSTD_createDCtx(); |
|
650 if (!dctx) { |
|
651 PyErr_NoMemory(); |
|
652 goto finally; |
|
653 } |
|
654 |
|
655 buffer1Size = frameParams.frameContentSize; |
|
656 buffer1 = PyMem_Malloc(buffer1Size); |
|
657 if (!buffer1) { |
|
658 goto finally; |
|
659 } |
|
660 |
|
661 Py_BEGIN_ALLOW_THREADS |
|
662 zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize); |
|
663 Py_END_ALLOW_THREADS |
|
664 if (ZSTD_isError(zresult)) { |
|
665 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); |
|
666 goto finally; |
|
667 } |
|
668 |
|
669 buffer1ContentSize = zresult; |
|
670 |
|
671 /* Special case of a simple chain. */ |
|
672 if (1 == chunksLen) { |
|
673 result = PyBytes_FromStringAndSize(buffer1, buffer1Size); |
|
674 goto finally; |
|
675 } |
|
676 |
|
677 /* This should ideally look at next chunk. But this is slightly simpler. */ |
|
678 buffer2Size = frameParams.frameContentSize; |
|
679 buffer2 = PyMem_Malloc(buffer2Size); |
|
680 if (!buffer2) { |
|
681 goto finally; |
|
682 } |
|
683 |
|
684 /* For each subsequent chunk, use the previous fulltext as a content dictionary. |
|
685 Our strategy is to have 2 buffers. One holds the previous fulltext (to be |
|
686 used as a content dictionary) and the other holds the new fulltext. The |
|
687 buffers grow when needed but never decrease in size. This limits the |
|
688 memory allocator overhead. |
|
689 */ |
|
690 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) { |
|
691 chunk = PyList_GetItem(chunks, chunkIndex); |
|
692 if (!PyBytes_Check(chunk)) { |
|
693 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex); |
|
694 goto finally; |
|
695 } |
|
696 |
|
697 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
698 zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); |
|
699 if (ZSTD_isError(zresult)) { |
|
700 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); |
|
701 goto finally; |
|
702 } |
|
703 else if (zresult) { |
|
704 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex); |
|
705 goto finally; |
|
706 } |
|
707 |
|
708 if (0 == frameParams.frameContentSize) { |
|
709 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); |
|
710 goto finally; |
|
711 } |
|
712 |
|
713 parity = chunkIndex % 2; |
|
714 |
|
715 /* This could definitely be abstracted to reduce code duplication. */ |
|
716 if (parity) { |
|
717 /* Resize destination buffer to hold larger content. */ |
|
718 if (buffer2Size < frameParams.frameContentSize) { |
|
719 buffer2Size = frameParams.frameContentSize; |
|
720 destBuffer = PyMem_Realloc(buffer2, buffer2Size); |
|
721 if (!destBuffer) { |
|
722 goto finally; |
|
723 } |
|
724 buffer2 = destBuffer; |
|
725 } |
|
726 |
|
727 Py_BEGIN_ALLOW_THREADS |
|
728 zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size, |
|
729 chunkData, chunkSize, buffer1, buffer1ContentSize); |
|
730 Py_END_ALLOW_THREADS |
|
731 if (ZSTD_isError(zresult)) { |
|
732 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
733 chunkIndex, ZSTD_getErrorName(zresult)); |
|
734 goto finally; |
|
735 } |
|
736 buffer2ContentSize = zresult; |
|
737 } |
|
738 else { |
|
739 if (buffer1Size < frameParams.frameContentSize) { |
|
740 buffer1Size = frameParams.frameContentSize; |
|
741 destBuffer = PyMem_Realloc(buffer1, buffer1Size); |
|
742 if (!destBuffer) { |
|
743 goto finally; |
|
744 } |
|
745 buffer1 = destBuffer; |
|
746 } |
|
747 |
|
748 Py_BEGIN_ALLOW_THREADS |
|
749 zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size, |
|
750 chunkData, chunkSize, buffer2, buffer2ContentSize); |
|
751 Py_END_ALLOW_THREADS |
|
752 if (ZSTD_isError(zresult)) { |
|
753 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
754 chunkIndex, ZSTD_getErrorName(zresult)); |
|
755 goto finally; |
|
756 } |
|
757 buffer1ContentSize = zresult; |
|
758 } |
|
759 } |
|
760 |
|
761 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1, |
|
762 parity ? buffer2ContentSize : buffer1ContentSize); |
|
763 |
|
764 finally: |
|
765 if (buffer2) { |
|
766 PyMem_Free(buffer2); |
|
767 } |
|
768 if (buffer1) { |
|
769 PyMem_Free(buffer1); |
|
770 } |
|
771 |
|
772 if (dctx) { |
|
773 ZSTD_freeDCtx(dctx); |
|
774 } |
604 |
775 |
605 return result; |
776 return result; |
606 } |
777 } |
607 |
778 |
608 static PyMethodDef Decompressor_methods[] = { |
779 static PyMethodDef Decompressor_methods[] = { |
614 Decompressor_decompressobj__doc__ }, |
785 Decompressor_decompressobj__doc__ }, |
615 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS, |
786 { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS, |
616 Decompressor_read_from__doc__ }, |
787 Decompressor_read_from__doc__ }, |
617 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, |
788 { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, |
618 Decompressor_write_to__doc__ }, |
789 Decompressor_write_to__doc__ }, |
|
790 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, |
|
791 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, |
619 { NULL, NULL } |
792 { NULL, NULL } |
620 }; |
793 }; |
621 |
794 |
622 PyTypeObject ZstdDecompressorType = { |
795 PyTypeObject ZstdDecompressorType = { |
623 PyVarObject_HEAD_INIT(NULL, 0) |
796 PyVarObject_HEAD_INIT(NULL, 0) |