changeset 30577:6146d5acee69

parsers: use buffer to store revlog index Previously, the revlog index passed to parse_index2 must be a "string", which means we have to read the whole revlog index into memory. This patch makes the code accept a generic Py_buffer, to be more flexible - it could be a "string", or anything that implements the buffer interface, like a mmap-ed region. Note: ideally we want to remove the "data" field. However, it is still used in parse_index2: if (idx->inlined) { cache = Py_BuildValue("iO", 0, idx->data); .... } .... tuple = Py_BuildValue("NN", idx, cache); .... return tuple; Its only users are revlogio.parseindex and revlog.__init__: # revlogio.parseindex index, cache = parsers.parse_index2(data, inline) return index, getattr(index, 'nodemap', None), cache # revlog.__init__ d = self._io.parseindex(indexdata, self._inline) self.index, nodemap, self._chunkcache = d Maybe we could move the logic (testing inline and returnning "data" object) to revlog.py. But that should be a separate patch.
author Jun Wu <quark@fb.com>
date Tue, 06 Dec 2016 11:44:49 +0000
parents 541949a10a68
children c6ce11f2ee50
files mercurial/parsers.c
diffstat 1 files changed, 18 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/parsers.c	Tue Dec 06 06:27:58 2016 +0530
+++ b/mercurial/parsers.c	Tue Dec 06 11:44:49 2016 +0000
@@ -753,6 +753,7 @@
 	PyObject_HEAD
 	/* Type-specific fields go here. */
 	PyObject *data;        /* raw bytes of index */
+	Py_buffer buf;         /* buffer of data */
 	PyObject **cache;      /* cached tuples */
 	const char **offsets;  /* populated on demand */
 	Py_ssize_t raw_length; /* original number of elements */
@@ -808,7 +809,7 @@
 		return self->offsets[pos];
 	}
 
-	return PyBytes_AS_STRING(self->data) + pos * v1_hdrsize;
+	return (const char *)(self->buf.buf) + pos * v1_hdrsize;
 }
 
 static inline int index_get_parents(indexObject *self, Py_ssize_t rev,
@@ -2389,9 +2390,9 @@
  */
 static Py_ssize_t inline_scan(indexObject *self, const char **offsets)
 {
-	const char *data = PyBytes_AS_STRING(self->data);
+	const char *data = (const char *)self->buf.buf;
 	Py_ssize_t pos = 0;
-	Py_ssize_t end = PyBytes_GET_SIZE(self->data);
+	Py_ssize_t end = self->buf.len;
 	long incr = v1_hdrsize;
 	Py_ssize_t len = 0;
 
@@ -2425,6 +2426,7 @@
 	self->added = NULL;
 	self->cache = NULL;
 	self->data = NULL;
+	memset(&self->buf, 0, sizeof(self->buf));
 	self->headrevs = NULL;
 	self->filteredrevs = Py_None;
 	Py_INCREF(Py_None);
@@ -2433,11 +2435,15 @@
 
 	if (!PyArg_ParseTuple(args, "OO", &data_obj, &inlined_obj))
 		return -1;
-	if (!PyBytes_Check(data_obj)) {
-		PyErr_SetString(PyExc_TypeError, "data is not a string");
+	if (!PyObject_CheckBuffer(data_obj)) {
+		PyErr_SetString(PyExc_TypeError,
+				"data does not support buffer interface");
 		return -1;
 	}
-	size = PyBytes_GET_SIZE(data_obj);
+
+	if (PyObject_GetBuffer(data_obj, &self->buf, PyBUF_SIMPLE) == -1)
+		return -1;
+	size = self->buf.len;
 
 	self->inlined = inlined_obj && PyObject_IsTrue(inlined_obj);
 	self->data = data_obj;
@@ -2478,6 +2484,10 @@
 {
 	_index_clearcaches(self);
 	Py_XDECREF(self->filteredrevs);
+	if (self->buf.buf) {
+		PyBuffer_Release(&self->buf);
+		memset(&self->buf, 0, sizeof(self->buf));
+	}
 	Py_XDECREF(self->data);
 	Py_XDECREF(self->added);
 	PyObject_Del(self);
@@ -2577,7 +2587,8 @@
  * follows:
  *
  * index: an index object that lazily parses RevlogNG records
- * cache: if data is inlined, a tuple (index_file_content, 0), else None
+ * cache: if data is inlined, a tuple (0, index_file_content), else None
+ *        index_file_content could be a string, or a buffer
  *
  * added complications are for backwards compatibility
  */