parsers: use buffer to store revlog index
Previously, the revlog index passed to parse_index2 must be a "string",
which means we have to read the whole revlog index into memory. This patch
makes the code accept a generic Py_buffer, to be more flexible - it could be
a "string", or anything that implements the buffer interface, like a mmap-ed
region.
Note: ideally we want to remove the "data" field. However, it is still used
in parse_index2:
if (idx->inlined) {
cache = Py_BuildValue("iO", 0, idx->data);
....
}
....
tuple = Py_BuildValue("NN", idx, cache);
....
return tuple;
Its only users are revlogio.parseindex and revlog.__init__:
# revlogio.parseindex
index, cache = parsers.parse_index2(data, inline)
return index, getattr(index, 'nodemap', None), cache
# revlog.__init__
d = self._io.parseindex(indexdata, self._inline)
self.index, nodemap, self._chunkcache = d
Maybe we could move the logic (testing inline and returnning "data" object)
to revlog.py. But that should be a separate patch.
--- a/mercurial/parsers.c Tue Dec 06 06:27:58 2016 +0530
+++ b/mercurial/parsers.c Tue Dec 06 11:44:49 2016 +0000
@@ -753,6 +753,7 @@
PyObject_HEAD
/* Type-specific fields go here. */
PyObject *data; /* raw bytes of index */
+ Py_buffer buf; /* buffer of data */
PyObject **cache; /* cached tuples */
const char **offsets; /* populated on demand */
Py_ssize_t raw_length; /* original number of elements */
@@ -808,7 +809,7 @@
return self->offsets[pos];
}
- return PyBytes_AS_STRING(self->data) + pos * v1_hdrsize;
+ return (const char *)(self->buf.buf) + pos * v1_hdrsize;
}
static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
@@ -2389,9 +2390,9 @@
*/
static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
{
- const char *data = PyBytes_AS_STRING(self->data);
+ const char *data = (const char *)self->buf.buf;
Py_ssize_t pos = 0;
- Py_ssize_t end = PyBytes_GET_SIZE(self->data);
+ Py_ssize_t end = self->buf.len;
long incr = v1_hdrsize;
Py_ssize_t len = 0;
@@ -2425,6 +2426,7 @@
self->added = NULL;
self->cache = NULL;
self->data = NULL;
+ memset(&self->buf, 0, sizeof(self->buf));
self->headrevs = NULL;
self->filteredrevs = Py_None;
Py_INCREF(Py_None);
@@ -2433,11 +2435,15 @@
if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
return -1;
- if (!PyBytes_Check(data_obj)) {
- PyErr_SetString(PyExc_TypeError, "data is not a string");
+ if (!PyObject_CheckBuffer(data_obj)) {
+ PyErr_SetString(PyExc_TypeError,
+ "data does not support buffer interface");
return -1;
}
- size = PyBytes_GET_SIZE(data_obj);
+
+ if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
+ return -1;
+ size = self->buf.len;
self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
self->data = data_obj;
@@ -2478,6 +2484,10 @@
{
_index_clearcaches(self);
Py_XDECREF(self->filteredrevs);
+ if (self->buf.buf) {
+ PyBuffer_Release(&self->buf);
+ memset(&self->buf, 0, sizeof(self->buf));
+ }
Py_XDECREF(self->data);
Py_XDECREF(self->added);
PyObject_Del(self);
@@ -2577,7 +2587,8 @@
* follows:
*
* index: an index object that lazily parses RevlogNG records
- * cache: if data is inlined, a tuple (index_file_content, 0), else None
+ * cache: if data is inlined, a tuple (0, index_file_content), else None
+ * index_file_content could be a string, or a buffer
*
* added complications are for backwards compatibility
*/