revlog: don't cache parsed tuples in the C module
authorJoerg Sonnenberger <joerg@bec.de>
Tue, 06 Oct 2020 13:34:51 +0200
changeset 45811 4404f129341e
parent 45810 09914d934cf4
child 45812 976b26bdd0d8
revlog: don't cache parsed tuples in the C module A cached entry creates ~8 Python objects per cached changeset, which comes to around 200 Bytes per cached changeset on AMD64. Especially for operations that touch a lot of changesets, that can easily sum up to more than a 100MB of memory. Simple tests on large repositories show <2% runtime penalty for ripping out the cache, even for cache heavy operations like "hg log" for all revisions. Differential Revision: https://phab.mercurial-scm.org/D9155
mercurial/cext/revlog.c
--- a/mercurial/cext/revlog.c	Fri Oct 16 16:00:32 2020 -0700
+++ b/mercurial/cext/revlog.c	Tue Oct 06 13:34:51 2020 +0200
@@ -81,7 +81,6 @@
 	    /* Type-specific fields go here. */
 	    PyObject *data;     /* raw bytes of index */
 	Py_buffer buf;          /* buffer of data */
-	PyObject **cache;       /* cached tuples */
 	const char **offsets;   /* populated on demand */
 	Py_ssize_t raw_length;  /* original number of elements */
 	Py_ssize_t length;      /* current number of elements */
@@ -327,7 +326,6 @@
 	const char *c_node_id;
 	const char *data;
 	Py_ssize_t length = index_length(self);
-	PyObject *entry;
 
 	if (pos == nullrev) {
 		Py_INCREF(nullentry);
@@ -346,17 +344,6 @@
 		return obj;
 	}
 
-	if (self->cache) {
-		if (self->cache[pos]) {
-			Py_INCREF(self->cache[pos]);
-			return self->cache[pos];
-		}
-	} else {
-		self->cache = calloc(self->raw_length, sizeof(PyObject *));
-		if (self->cache == NULL)
-			return PyErr_NoMemory();
-	}
-
 	data = index_deref(self, pos);
 	if (data == NULL)
 		return NULL;
@@ -377,18 +364,9 @@
 	parent_2 = getbe32(data + 28);
 	c_node_id = data + 32;
 
-	entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
-	                      base_rev, link_rev, parent_1, parent_2, c_node_id,
-	                      (Py_ssize_t)20);
-
-	if (entry) {
-		PyObject_GC_UnTrack(entry);
-		Py_INCREF(entry);
-	}
-
-	self->cache[pos] = entry;
-
-	return entry;
+	return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
+	                     base_rev, link_rev, parent_1, parent_2, c_node_id,
+	                     (Py_ssize_t)20);
 }
 
 /*
@@ -2578,14 +2556,8 @@
 		}
 
 		self->length = start;
-		if (start < self->raw_length) {
-			if (self->cache) {
-				Py_ssize_t i;
-				for (i = start; i < self->raw_length; i++)
-					Py_CLEAR(self->cache[i]);
-			}
+		if (start < self->raw_length)
 			self->raw_length = start;
-		}
 		goto done;
 	}
 
@@ -2677,7 +2649,6 @@
 	 */
 	self->raw_length = 0;
 	self->added = NULL;
-	self->cache = NULL;
 	self->data = NULL;
 	memset(&self->buf, 0, sizeof(self->buf));
 	self->headrevs = NULL;
@@ -2733,14 +2704,6 @@
 
 static void _index_clearcaches(indexObject *self)
 {
-	if (self->cache) {
-		Py_ssize_t i;
-
-		for (i = 0; i < self->raw_length; i++)
-			Py_CLEAR(self->cache[i]);
-		free(self->cache);
-		self->cache = NULL;
-	}
 	if (self->offsets) {
 		PyMem_Free((void *)self->offsets);
 		self->offsets = NULL;