comparison mercurial/cext/revlog.c @ 45811:4404f129341e

revlog: don't cache parsed tuples in the C module A cached entry creates ~8 Python objects per cached changeset, which comes to around 200 Bytes per cached changeset on AMD64. Especially for operations that touch a lot of changesets, that can easily sum up to more than a 100MB of memory. Simple tests on large repositories show <2% runtime penalty for ripping out the cache, even for cache heavy operations like "hg log" for all revisions. Differential Revision: https://phab.mercurial-scm.org/D9155
author Joerg Sonnenberger <joerg@bec.de>
date Tue, 06 Oct 2020 13:34:51 +0200
parents a6fde9d789d9
children 0ce15a8c7b8b
comparison
equal deleted inserted replaced
45810:09914d934cf4 45811:4404f129341e
79 struct indexObjectStruct { 79 struct indexObjectStruct {
80 PyObject_HEAD 80 PyObject_HEAD
81 /* Type-specific fields go here. */ 81 /* Type-specific fields go here. */
82 PyObject *data; /* raw bytes of index */ 82 PyObject *data; /* raw bytes of index */
83 Py_buffer buf; /* buffer of data */ 83 Py_buffer buf; /* buffer of data */
84 PyObject **cache; /* cached tuples */
85 const char **offsets; /* populated on demand */ 84 const char **offsets; /* populated on demand */
86 Py_ssize_t raw_length; /* original number of elements */ 85 Py_ssize_t raw_length; /* original number of elements */
87 Py_ssize_t length; /* current number of elements */ 86 Py_ssize_t length; /* current number of elements */
88 PyObject *added; /* populated on demand */ 87 PyObject *added; /* populated on demand */
89 PyObject *headrevs; /* cache, invalidated on changes */ 88 PyObject *headrevs; /* cache, invalidated on changes */
325 uint64_t offset_flags; 324 uint64_t offset_flags;
326 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2; 325 int comp_len, uncomp_len, base_rev, link_rev, parent_1, parent_2;
327 const char *c_node_id; 326 const char *c_node_id;
328 const char *data; 327 const char *data;
329 Py_ssize_t length = index_length(self); 328 Py_ssize_t length = index_length(self);
330 PyObject *entry;
331 329
332 if (pos == nullrev) { 330 if (pos == nullrev) {
333 Py_INCREF(nullentry); 331 Py_INCREF(nullentry);
334 return nullentry; 332 return nullentry;
335 } 333 }
342 if (pos >= self->length) { 340 if (pos >= self->length) {
343 PyObject *obj; 341 PyObject *obj;
344 obj = PyList_GET_ITEM(self->added, pos - self->length); 342 obj = PyList_GET_ITEM(self->added, pos - self->length);
345 Py_INCREF(obj); 343 Py_INCREF(obj);
346 return obj; 344 return obj;
347 }
348
349 if (self->cache) {
350 if (self->cache[pos]) {
351 Py_INCREF(self->cache[pos]);
352 return self->cache[pos];
353 }
354 } else {
355 self->cache = calloc(self->raw_length, sizeof(PyObject *));
356 if (self->cache == NULL)
357 return PyErr_NoMemory();
358 } 345 }
359 346
360 data = index_deref(self, pos); 347 data = index_deref(self, pos);
361 if (data == NULL) 348 if (data == NULL)
362 return NULL; 349 return NULL;
375 link_rev = getbe32(data + 20); 362 link_rev = getbe32(data + 20);
376 parent_1 = getbe32(data + 24); 363 parent_1 = getbe32(data + 24);
377 parent_2 = getbe32(data + 28); 364 parent_2 = getbe32(data + 28);
378 c_node_id = data + 32; 365 c_node_id = data + 32;
379 366
380 entry = Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len, 367 return Py_BuildValue(tuple_format, offset_flags, comp_len, uncomp_len,
381 base_rev, link_rev, parent_1, parent_2, c_node_id, 368 base_rev, link_rev, parent_1, parent_2, c_node_id,
382 (Py_ssize_t)20); 369 (Py_ssize_t)20);
383
384 if (entry) {
385 PyObject_GC_UnTrack(entry);
386 Py_INCREF(entry);
387 }
388
389 self->cache[pos] = entry;
390
391 return entry;
392 } 370 }
393 371
394 /* 372 /*
395 * Return the 20-byte SHA of the node corresponding to the given rev. 373 * Return the 20-byte SHA of the node corresponding to the given rev.
396 */ 374 */
2576 } else if (self->added) { 2554 } else if (self->added) {
2577 Py_CLEAR(self->added); 2555 Py_CLEAR(self->added);
2578 } 2556 }
2579 2557
2580 self->length = start; 2558 self->length = start;
2581 if (start < self->raw_length) { 2559 if (start < self->raw_length)
2582 if (self->cache) {
2583 Py_ssize_t i;
2584 for (i = start; i < self->raw_length; i++)
2585 Py_CLEAR(self->cache[i]);
2586 }
2587 self->raw_length = start; 2560 self->raw_length = start;
2588 }
2589 goto done; 2561 goto done;
2590 } 2562 }
2591 2563
2592 if (self->ntinitialized) { 2564 if (self->ntinitialized) {
2593 index_invalidate_added(self, start - self->length); 2565 index_invalidate_added(self, start - self->length);
2675 2647
2676 /* Initialize before argument-checking to avoid index_dealloc() crash. 2648 /* Initialize before argument-checking to avoid index_dealloc() crash.
2677 */ 2649 */
2678 self->raw_length = 0; 2650 self->raw_length = 0;
2679 self->added = NULL; 2651 self->added = NULL;
2680 self->cache = NULL;
2681 self->data = NULL; 2652 self->data = NULL;
2682 memset(&self->buf, 0, sizeof(self->buf)); 2653 memset(&self->buf, 0, sizeof(self->buf));
2683 self->headrevs = NULL; 2654 self->headrevs = NULL;
2684 self->filteredrevs = Py_None; 2655 self->filteredrevs = Py_None;
2685 Py_INCREF(Py_None); 2656 Py_INCREF(Py_None);
2731 return (PyObject *)self; 2702 return (PyObject *)self;
2732 } 2703 }
2733 2704
2734 static void _index_clearcaches(indexObject *self) 2705 static void _index_clearcaches(indexObject *self)
2735 { 2706 {
2736 if (self->cache) {
2737 Py_ssize_t i;
2738
2739 for (i = 0; i < self->raw_length; i++)
2740 Py_CLEAR(self->cache[i]);
2741 free(self->cache);
2742 self->cache = NULL;
2743 }
2744 if (self->offsets) { 2707 if (self->offsets) {
2745 PyMem_Free((void *)self->offsets); 2708 PyMem_Free((void *)self->offsets);
2746 self->offsets = NULL; 2709 self->offsets = NULL;
2747 } 2710 }
2748 if (self->ntinitialized) { 2711 if (self->ntinitialized) {