Mercurial > hg
changeset 30577:6146d5acee69
parsers: use buffer to store revlog index
Previously, the revlog index passed to parse_index2 must be a "string",
which means we have to read the whole revlog index into memory. This patch
makes the code accept a generic Py_buffer, to be more flexible - it could be
a "string", or anything that implements the buffer interface, like a mmap-ed
region.
Note: ideally we want to remove the "data" field. However, it is still used
in parse_index2:
if (idx->inlined) {
cache = Py_BuildValue("iO", 0, idx->data);
....
}
....
tuple = Py_BuildValue("NN", idx, cache);
....
return tuple;
Its only users are revlogio.parseindex and revlog.__init__:
# revlogio.parseindex
index, cache = parsers.parse_index2(data, inline)
return index, getattr(index, 'nodemap', None), cache
# revlog.__init__
d = self._io.parseindex(indexdata, self._inline)
self.index, nodemap, self._chunkcache = d
Maybe we could move the logic (testing inline and returnning "data" object)
to revlog.py. But that should be a separate patch.
author | Jun Wu <quark@fb.com> |
---|---|
date | Tue, 06 Dec 2016 11:44:49 +0000 |
parents | 541949a10a68 |
children | c6ce11f2ee50 |
files | mercurial/parsers.c |
diffstat | 1 files changed, 18 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/parsers.c Tue Dec 06 06:27:58 2016 +0530 +++ b/mercurial/parsers.c Tue Dec 06 11:44:49 2016 +0000 @@ -753,6 +753,7 @@ PyObject_HEAD /* Type-specific fields go here. */ PyObject *data; /* raw bytes of index */ + Py_buffer buf; /* buffer of data */ PyObject **cache; /* cached tuples */ const char **offsets; /* populated on demand */ Py_ssize_t raw_length; /* original number of elements */ @@ -808,7 +809,7 @@ return self->offsets[pos]; } - return PyBytes_AS_STRING(self->data) + pos * v1_hdrsize; + return (const char *)(self->buf.buf) + pos * v1_hdrsize; } static inline int index_get_parents(indexObject *self, Py_ssize_t rev, @@ -2389,9 +2390,9 @@ */ static Py_ssize_t inline_scan(indexObject *self, const char **offsets) { - const char *data = PyBytes_AS_STRING(self->data); + const char *data = (const char *)self->buf.buf; Py_ssize_t pos = 0; - Py_ssize_t end = PyBytes_GET_SIZE(self->data); + Py_ssize_t end = self->buf.len; long incr = v1_hdrsize; Py_ssize_t len = 0; @@ -2425,6 +2426,7 @@ self->added = NULL; self->cache = NULL; self->data = NULL; + memset(&self->buf, 0, sizeof(self->buf)); self->headrevs = NULL; self->filteredrevs = Py_None; Py_INCREF(Py_None); @@ -2433,11 +2435,15 @@ if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj)) return -1; - if (!PyBytes_Check(data_obj)) { - PyErr_SetString(PyExc_TypeError, "data is not a string"); + if (!PyObject_CheckBuffer(data_obj)) { + PyErr_SetString(PyExc_TypeError, + "data does not support buffer interface"); return -1; } - size = PyBytes_GET_SIZE(data_obj); + + if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1) + return -1; + size = self->buf.len; self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj); self->data = data_obj; @@ -2478,6 +2484,10 @@ { _index_clearcaches(self); Py_XDECREF(self->filteredrevs); + if (self->buf.buf) { + PyBuffer_Release(&self->buf); + memset(&self->buf, 0, sizeof(self->buf)); + } Py_XDECREF(self->data); Py_XDECREF(self->added); PyObject_Del(self); @@ -2577,7 +2587,8 @@ * follows: * * index: an index object that lazily parses RevlogNG records - * cache: if data is inlined, a tuple (index_file_content, 0), else None + * cache: if data is inlined, a tuple (0, index_file_content), else None + * index_file_content could be a string, or a buffer * * added complications are for backwards compatibility */