revlog: don't cache parsed tuples in the C module
A cached entry creates ~8 Python objects per cached changeset, which
comes to around 200 Bytes per cached changeset on AMD64. Especially for
operations that touch a lot of changesets, that can easily sum up to
more than a 100MB of memory. Simple tests on large repositories show
<2% runtime penalty for ripping out the cache, even for cache heavy
operations like "hg log" for all revisions.
Differential Revision: https://phab.mercurial-scm.org/D9155
--- a/mercurial/cext/revlog.c Fri Oct 16 16:00:32 2020 -0700
+++ b/mercurial/cext/revlog.c Tue Oct 06 13:34:51 2020 +0200
@@ -81,7 +81,6 @@
/* Type-specific fields go here. */
PyObject *data; /* raw bytes of index */
Py_buffer buf; /* buffer of data */
- PyObject **cache; /* cached tuples */
const char **offsets; /* populated on demand */
Py_ssize_t raw_length; /* original number of elements */
Py_ssize_t length; /* current number of elements */
@@ -327,7 +326,6 @@
const char *c_node_id;
const char *data;
Py_ssize_t length = index_length(self);
- PyObject *entry;
if (pos == nullrev) {
Py_INCREF(nullentry);
@@ -346,17 +344,6 @@
return obj;
}
- if (self->cache) {
- if (self->cache[pos]) {
- Py_INCREF(self->cache[pos]);
- return self->cache[pos];
- }
- } else {
- self->cache = calloc(self->raw_length, sizeof(PyObject *));
- if (self->cache == NULL)
- return PyErr_NoMemory();
- }
-
data = index_deref(self, pos);
if (data == NULL)
return NULL;
@@ -377,18 +364,9 @@
parent_2 = getbe32(data + 28);
c_node_id = data + 32;
- entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
- base_rev, link_rev, parent_1, parent_2, c_node_id,
- (Py_ssize_t)20);
-
- if (entry) {
- PyObject_GC_UnTrack(entry);
- Py_INCREF(entry);
- }
-
- self->cache[pos] = entry;
-
- return entry;
+ return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
+ base_rev, link_rev, parent_1, parent_2, c_node_id,
+ (Py_ssize_t)20);
}
/*
@@ -2578,14 +2556,8 @@
}
self->length = start;
- if (start < self->raw_length) {
- if (self->cache) {
- Py_ssize_t i;
- for (i = start; i < self->raw_length; i++)
- Py_CLEAR(self->cache[i]);
- }
+ if (start < self->raw_length)
self->raw_length = start;
- }
goto done;
}
@@ -2677,7 +2649,6 @@
*/
self->raw_length = 0;
self->added = NULL;
- self->cache = NULL;
self->data = NULL;
memset(&self->buf, 0, sizeof(self->buf));
self->headrevs = NULL;
@@ -2733,14 +2704,6 @@
static void _index_clearcaches(indexObject *self)
{
- if (self->cache) {
- Py_ssize_t i;
-
- for (i = 0; i < self->raw_length; i++)
- Py_CLEAR(self->cache[i]);
- free(self->cache);
- self->cache = NULL;
- }
if (self->offsets) {
PyMem_Free((void *)self->offsets);
self->offsets = NULL;