Mercurial > hg
changeset 44087:dc9b53482689
sha1dc: use buffer protocol when parsing arguments
Without this, functions won't accept bytearray, memoryview,
or other types that can be exposed as bytes to the C API.
The most resilient way to obtain a bytes-like object from
the C API is using the Py_buffer interface.
This commit converts use of s#/y# to s*/y* and uses
Py_buffer for accessing the underlying bytes array.
I checked how hashlib is implemented in CPython and the
the implementation agrees with its use of the Py_buffer
interface as well as using BufferError in cases of bad
buffer types. Sadly, there's no good way to test for
ndim > 1 without writing our own C-backed Python type.
Differential Revision: https://phab.mercurial-scm.org/D7879
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Tue, 14 Jan 2020 18:59:49 -0800 |
parents | ffac09da7a19 |
children | b3ec1ea95ee6 |
files | mercurial/thirdparty/sha1dc/cext.c tests/test-hashutil.py |
diffstat | 2 files changed, 43 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/thirdparty/sha1dc/cext.c Tue Jan 14 20:05:37 2020 -0500 +++ b/mercurial/thirdparty/sha1dc/cext.c Tue Jan 14 18:59:49 2020 -0800 @@ -25,8 +25,8 @@ static int pysha1ctx_init(pysha1ctx *self, PyObject *args) { - const char *data = NULL; - Py_ssize_t len; + Py_buffer data; + data.obj = NULL; SHA1DCInit(&(self->ctx)); /* We don't want "safe" sha1s, wherein sha1dc can give you a @@ -34,11 +34,19 @@ collision. We just want to detect collisions. */ SHA1DCSetSafeHash(&(self->ctx), 0); - if (!PyArg_ParseTuple(args, PY23("|s#", "|y#"), &data, &len)) { + if (!PyArg_ParseTuple(args, PY23("|s*", "|y*"), &data)) { return -1; } - if (data) { - SHA1DCUpdate(&(self->ctx), data, len); + if (data.obj) { + if (!PyBuffer_IsContiguous(&data, 'C') || data.ndim > 1) { + PyErr_SetString(PyExc_BufferError, + "buffer must be contiguous and single dimension"); + PyBuffer_Release(&data); + return -1; + } + + SHA1DCUpdate(&(self->ctx), data.buf, data.len); + PyBuffer_Release(&data); } return 0; } @@ -50,12 +58,18 @@ static PyObject *pysha1ctx_update(pysha1ctx *self, PyObject *args) { - const char *data; - Py_ssize_t len; - if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &data, &len)) { + Py_buffer data; + if (!PyArg_ParseTuple(args, PY23("s*", "y*"), &data)) { return NULL; } - SHA1DCUpdate(&(self->ctx), data, len); + if (!PyBuffer_IsContiguous(&data, 'C') || data.ndim > 1) { + PyErr_SetString(PyExc_BufferError, + "buffer must be contiguous and single dimension"); + PyBuffer_Release(&data); + return NULL; + } + SHA1DCUpdate(&(self->ctx), data.buf, data.len); + PyBuffer_Release(&data); Py_RETURN_NONE; }
--- a/tests/test-hashutil.py Tue Jan 14 20:05:37 2020 -0500 +++ b/tests/test-hashutil.py Tue Jan 14 18:59:49 2020 -0800 @@ -45,6 +45,26 @@ h.digest(), ) + def test_bytes_like_types(self): + h = self.hasher() + h.update(bytearray(b'foo')) + h.update(memoryview(b'baz')) + self.assertEqual( + '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest() + ) + + h = self.hasher(bytearray(b'foo')) + h.update(b'baz') + self.assertEqual( + '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest() + ) + + h = self.hasher(memoryview(b'foo')) + h.update(b'baz') + self.assertEqual( + '21eb6533733a5e4763acacd1d45a60c2e0e404e1', h.hexdigest() + ) + class hashlibtests(unittest.TestCase, hashertestsbase): hasher = hashlib.sha1