# HG changeset patch # User Bryan O'Sullivan # Date 1334264759 25200 # Node ID e8d37b78acfb22ae2c1fb126c200398cc9c71208 # Parent 1a420761fcb72bfd5e5e0c15b8b2775ff997cc5f parsers: use base-16 trie for faster node->rev mapping This greatly speeds up node->rev lookups, with results that are often user-perceptible: for instance, "hg --time log" of the node associated with rev 1000 on a linux-2.6 repo improves from 0.3 seconds to 0.03. I have not found any instances of slowdowns. The new perfnodelookup command in contrib/perf.py demonstrates the speedup more dramatically, since it performs no I/O. For a single lookup, the new code is about 40x faster. These changes also prepare the ground for the possibility of further improving the performance of prefix-based node lookups. diff -r 1a420761fcb7 -r e8d37b78acfb contrib/perf.py --- a/contrib/perf.py Thu Apr 12 20:22:18 2012 -0500 +++ b/contrib/perf.py Thu Apr 12 14:05:59 2012 -0700 @@ -1,7 +1,7 @@ # perf.py - performance test routines '''helper extension to measure performance''' -from mercurial import cmdutil, scmutil, match, commands +from mercurial import cmdutil, scmutil, util, match, commands import time, os, sys def timer(func, title=None): @@ -120,6 +120,27 @@ cl.rev(n) timer(d) +def perfnodelookup(ui, repo, rev): + import mercurial.revlog + mercurial.revlog._prereadsize = 2**24 # disable lazy parser in old hg + n = repo[rev].node() + cl = mercurial.revlog.revlog(repo.sopener, "00changelog.i") + # behave somewhat consistently across internal API changes + if util.safehasattr(cl, 'clearcaches'): + clearcaches = cl.clearcaches + elif util.safehasattr(cl, '_nodecache'): + from mercurial.node import nullid, nullrev + def clearcaches(): + cl._nodecache = {nullid: nullrev} + cl._nodepos = None + else: + def clearcaches(): + pass + def d(): + cl.rev(n) + clearcaches() + timer(d) + def perflog(ui, repo, **opts): ui.pushbuffer() timer(lambda: commands.log(ui, repo, rev=[], date='', user='', diff -r 1a420761fcb7 -r e8d37b78acfb mercurial/parsers.c --- a/mercurial/parsers.c Thu Apr 12 20:22:18 2012 -0500 +++ b/mercurial/parsers.c Thu Apr 12 14:05:59 2012 -0700 @@ -215,10 +215,27 @@ } /* - * A list-like object that decodes the contents of a RevlogNG index - * file on demand. It has limited support for insert and delete at the - * last element before the end. The last entry is always a sentinel - * nullid. + * A base-16 trie for fast node->rev mapping. + * + * Positive value is index of the next node in the trie + * Negative value is a leaf: -(rev + 1) + * Zero is empty + */ +typedef struct { + int children[16]; +} nodetree; + +/* + * This class has two behaviours. + * + * When used in a list-like way (with integer keys), we decode an + * entry in a RevlogNG index file on demand. Our last entry is a + * sentinel, always a nullid. We have limited support for + * integer-keyed insert and delete, only at elements right before the + * sentinel. + * + * With string keys, we lazily perform a reverse mapping from node to + * rev, using a base-16 trie. */ typedef struct { PyObject_HEAD @@ -229,10 +246,18 @@ Py_ssize_t raw_length; /* original number of elements */ Py_ssize_t length; /* current number of elements */ PyObject *added; /* populated on demand */ + nodetree *nt; /* base-16 trie */ + int ntlength; /* # nodes in use */ + int ntcapacity; /* # nodes allocated */ + int ntdepth; /* maximum depth of tree */ + int ntsplits; /* # splits performed */ + int ntrev; /* last rev scanned */ + int ntlookups; /* # lookups */ + int ntmisses; /* # lookups that miss the cache */ int inlined; } indexObject; -static Py_ssize_t index_length(indexObject *self) +static Py_ssize_t index_length(const indexObject *self) { if (self->added == NULL) return self->length; @@ -240,6 +265,7 @@ } static PyObject *nullentry; +static const char nullid[20]; static long inline_scan(indexObject *self, const char **offsets); @@ -249,7 +275,27 @@ static char *tuple_format = "kiiiiiis#"; #endif -/* RevlogNG format (all in big endian, data may be inlined): +/* + * Return a pointer to the beginning of a RevlogNG record. + */ +static const char *index_deref(indexObject *self, Py_ssize_t pos) +{ + if (self->inlined && pos > 0) { + if (self->offsets == NULL) { + self->offsets = malloc(self->raw_length * + sizeof(*self->offsets)); + if (self->offsets == NULL) + return (const char *)PyErr_NoMemory(); + inline_scan(self, self->offsets); + } + return self->offsets[pos]; + } + + return PyString_AS_STRING(self->data) + pos * 64; +} + +/* + * RevlogNG format (all in big endian, data may be inlined): * 6 bytes: offset * 2 bytes: flags * 4 bytes: compressed length @@ -270,7 +316,10 @@ Py_ssize_t length = index_length(self); PyObject *entry; - if (pos >= length) { + if (pos < 0) + pos += length; + + if (pos < 0 || pos >= length) { PyErr_SetString(PyExc_IndexError, "revlog index out of range"); return NULL; } @@ -298,17 +347,9 @@ return PyErr_NoMemory(); } - if (self->inlined && pos > 0) { - if (self->offsets == NULL) { - self->offsets = malloc(self->raw_length * - sizeof(*self->offsets)); - if (self->offsets == NULL) - return PyErr_NoMemory(); - inline_scan(self, self->offsets); - } - data = self->offsets[pos]; - } else - data = PyString_AS_STRING(self->data) + pos * 64; + data = index_deref(self, pos); + if (data == NULL) + return NULL; memcpy(decode, data, 8 * sizeof(uint32_t)); @@ -341,26 +382,60 @@ return entry; } +/* + * Return the 20-byte SHA of the node corresponding to the given rev. + */ +static const char *index_node(indexObject *self, Py_ssize_t pos) +{ + Py_ssize_t length = index_length(self); + const char *data; + + if (pos == length - 1) + return nullid; + + if (pos >= length) + return NULL; + + if (pos >= self->length - 1) { + PyObject *tuple, *str; + tuple = PyList_GET_ITEM(self->added, pos - self->length + 1); + str = PyTuple_GetItem(tuple, 7); + return str ? PyString_AS_STRING(str) : NULL; + } + + data = index_deref(self, pos); + return data ? data + 32 : NULL; +} + +static int nt_insert(indexObject *self, const char *node, int rev); + +static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen) +{ + if (PyString_AsStringAndSize(obj, node, nodelen) == -1) + return -1; + if (*nodelen == 20) + return 0; + PyErr_SetString(PyExc_ValueError, "20-byte hash required"); + return -1; +} + static PyObject *index_insert(indexObject *self, PyObject *args) { - PyObject *obj, *node; + PyObject *obj; + char *node; long offset; - Py_ssize_t len; + Py_ssize_t len, nodelen; if (!PyArg_ParseTuple(args, "lO", &offset, &obj)) return NULL; if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) { - PyErr_SetString(PyExc_ValueError, "8-tuple required"); + PyErr_SetString(PyExc_TypeError, "8-tuple required"); return NULL; } - node = PyTuple_GET_ITEM(obj, 7); - if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) { - PyErr_SetString(PyExc_ValueError, - "20-byte hash required as last element"); + if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1) return NULL; - } len = index_length(self); @@ -373,6 +448,12 @@ return NULL; } + if (offset > INT_MAX) { + PyErr_SetString(PyExc_ValueError, + "currently only 2**31 revs supported"); + return NULL; + } + if (self->added == NULL) { self->added = PyList_New(0); if (self->added == NULL) @@ -382,6 +463,9 @@ if (PyList_Append(self->added, obj) == -1) return NULL; + if (self->nt) + nt_insert(self, node, (int)offset); + Py_RETURN_NONE; } @@ -401,26 +485,356 @@ free(self->offsets); self->offsets = NULL; } + if (self->nt) { + free(self->nt); + self->nt = NULL; + } } static PyObject *index_clearcaches(indexObject *self) { _index_clearcaches(self); + self->ntlength = self->ntcapacity = 0; + self->ntdepth = self->ntsplits = 0; + self->ntrev = -1; + self->ntlookups = self->ntmisses = 0; Py_RETURN_NONE; } -static int index_assign_subscript(indexObject *self, PyObject *item, - PyObject *value) +static PyObject *index_stats(indexObject *self) +{ + PyObject *obj = PyDict_New(); + + if (obj == NULL) + return NULL; + +#define istat(__n, __d) \ + if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \ + goto bail; + + if (self->added) { + Py_ssize_t len = PyList_GET_SIZE(self->added); + if (PyDict_SetItemString(obj, "index entries added", + PyInt_FromLong(len)) == -1) + goto bail; + } + + if (self->raw_length != self->length - 1) + istat(raw_length, "revs on disk"); + istat(length, "revs in memory"); + istat(ntcapacity, "node trie capacity"); + istat(ntdepth, "node trie depth"); + istat(ntlength, "node trie count"); + istat(ntlookups, "node trie lookups"); + istat(ntmisses, "node trie misses"); + istat(ntrev, "node trie last rev scanned"); + istat(ntsplits, "node trie splits"); + +#undef istat + + return obj; + +bail: + Py_XDECREF(obj); + return NULL; +} + +static inline int nt_level(const char *node, int level) +{ + int v = node[level>>1]; + if (!(level & 1)) + v >>= 4; + return v & 0xf; +} + +static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen) +{ + int level, off; + + if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0) + return -1; + + if (self->nt == NULL) + return -2; + + for (level = off = 0; level < nodelen; level++) { + int k = nt_level(node, level); + nodetree *n = &self->nt[off]; + int v = n->children[k]; + + if (v < 0) { + const char *n; + v = -v - 1; + n = index_node(self, v); + if (n == NULL) + return -2; + return memcmp(node, n, nodelen > 20 ? 20 : nodelen) + ? -2 : v; + } + if (v == 0) + return -2; + off = v; + } + return -2; +} + +static int nt_new(indexObject *self) +{ + if (self->ntlength == self->ntcapacity) { + self->ntcapacity *= 2; + self->nt = realloc(self->nt, + self->ntcapacity * sizeof(nodetree)); + if (self->nt == NULL) { + PyErr_SetString(PyExc_MemoryError, "out of memory"); + return -1; + } + memset(&self->nt[self->ntlength], 0, + sizeof(nodetree) * (self->ntcapacity - self->ntlength)); + } + return self->ntlength++; +} + +static int nt_insert(indexObject *self, const char *node, int rev) +{ + int level = 0; + int off = 0; + + while (level < 20) { + int k = nt_level(node, level); + nodetree *n; + int v; + + n = &self->nt[off]; + v = n->children[k]; + + if (v == 0) { + n->children[k] = -rev - 1; + return 0; + } + if (v < 0) { + const char *oldnode = index_node(self, -v - 1); + int noff; + + if (!oldnode || !memcmp(oldnode, node, 20)) { + n->children[k] = -rev - 1; + return 0; + } + noff = nt_new(self); + if (noff == -1) + return -1; + /* self->nt may have been changed by realloc */ + self->nt[off].children[k] = noff; + off = noff; + n = &self->nt[off]; + n->children[nt_level(oldnode, ++level)] = v; + if (level > self->ntdepth) + self->ntdepth = level; + self->ntsplits += 1; + } else { + level += 1; + off = v; + } + } + + return -1; +} + +/* + * Return values: + * + * -3: error (exception set) + * -2: not found (no exception set) + * rest: valid rev + */ +static int index_find_node(indexObject *self, + const char *node, Py_ssize_t nodelen) +{ + int rev; + + self->ntlookups++; + rev = nt_find(self, node, nodelen); + if (rev >= -1) + return rev; + + if (self->nt == NULL) { + self->ntcapacity = self->raw_length < 4 + ? 4 : self->raw_length / 2; + self->nt = calloc(self->ntcapacity, sizeof(nodetree)); + if (self->nt == NULL) { + PyErr_SetString(PyExc_MemoryError, "out of memory"); + return -3; + } + self->ntlength = 1; + self->ntrev = (int)index_length(self) - 1; + self->ntlookups = 1; + self->ntmisses = 0; + } + + /* + * For the first handful of lookups, we scan the entire index, + * and cache only the matching nodes. This optimizes for cases + * like "hg tip", where only a few nodes are accessed. + * + * After that, we cache every node we visit, using a single + * scan amortized over multiple lookups. This gives the best + * bulk performance, e.g. for "hg log". + */ + if (self->ntmisses++ < 4) { + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) + return -2; + if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { + if (nt_insert(self, n, rev) == -1) + return -3; + break; + } + } + } else { + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) + return -2; + if (nt_insert(self, n, rev) == -1) + return -3; + if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) { + break; + } + } + self->ntrev = rev; + } + + if (rev >= 0) + return rev; + return -2; +} + +static PyObject *raise_revlog_error(void) +{ + static PyObject *errclass; + PyObject *mod = NULL, *errobj; + + if (errclass == NULL) { + PyObject *dict; + + mod = PyImport_ImportModule("mercurial.error"); + if (mod == NULL) + goto classfail; + + dict = PyModule_GetDict(mod); + if (dict == NULL) + goto classfail; + + errclass = PyDict_GetItemString(dict, "RevlogError"); + if (errclass == NULL) { + PyErr_SetString(PyExc_SystemError, + "could not find RevlogError"); + goto classfail; + } + Py_INCREF(errclass); + } + + errobj = PyObject_CallFunction(errclass, NULL); + if (errobj == NULL) + return NULL; + PyErr_SetObject(errclass, errobj); + return errobj; + +classfail: + Py_XDECREF(mod); + return NULL; +} + +static PyObject *index_getitem(indexObject *self, PyObject *value) +{ + char *node; + Py_ssize_t nodelen; + int rev; + + if (PyInt_Check(value)) + return index_get(self, PyInt_AS_LONG(value)); + + if (PyString_AsStringAndSize(value, &node, &nodelen) == -1) + return NULL; + rev = index_find_node(self, node, nodelen); + if (rev >= -1) + return PyInt_FromLong(rev); + if (rev == -2) + raise_revlog_error(); + return NULL; +} + +static PyObject *index_m_get(indexObject *self, PyObject *args) +{ + char *node; + int nodelen, rev; + + if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) + return NULL; + + rev = index_find_node(self, node, nodelen); + if (rev == -3) + return NULL; + if (rev == -2) + Py_RETURN_NONE; + return PyInt_FromLong(rev); +} + +static int index_contains(indexObject *self, PyObject *value) +{ + char *node; + Py_ssize_t nodelen; + + if (PyInt_Check(value)) { + long rev = PyInt_AS_LONG(value); + return rev >= -1 && rev < index_length(self); + } + + if (!PyString_Check(value)) + return 0; + + node = PyString_AS_STRING(value); + nodelen = PyString_GET_SIZE(value); + + switch (index_find_node(self, node, nodelen)) { + case -3: + return -1; + case -2: + return 0; + default: + return 1; + } +} + +/* + * Invalidate any trie entries introduced by added revs. + */ +static void nt_invalidate_added(indexObject *self, Py_ssize_t start) +{ + Py_ssize_t i, len = PyList_GET_SIZE(self->added); + + for (i = start; i < len; i++) { + PyObject *tuple = PyList_GET_ITEM(self->added, i); + PyObject *node = PyTuple_GET_ITEM(tuple, 7); + + nt_insert(self, PyString_AS_STRING(node), -1); + } + + if (start == 0) { + Py_DECREF(self->added); + self->added = NULL; + } +} + +/* + * Delete a numeric range of revs, which must be at the end of the + * range, but exclude the sentinel nullid entry. + */ +static int index_slice_del(indexObject *self, PyObject *item) { Py_ssize_t start, stop, step, slicelength; Py_ssize_t length = index_length(self); - if (!PySlice_Check(item) || value != NULL) { - PyErr_SetString(PyExc_TypeError, - "revlog index only supports slice deletion"); - return -1; - } - if (PySlice_GetIndicesEx((PySliceObject*)item, length, &start, &stop, &step, &slicelength) < 0) return -1; @@ -449,20 +863,71 @@ return -1; } - if (start < self->length) { + if (start < self->length - 1) { + if (self->nt) { + Py_ssize_t i; + + for (i = start + 1; i < self->length - 1; i++) { + const char *node = index_node(self, i); + + if (node) + nt_insert(self, node, -1); + } + if (self->added) + nt_invalidate_added(self, 0); + if (self->ntrev > start) + self->ntrev = (int)start; + } self->length = start + 1; - if (self->added) { - Py_DECREF(self->added); - self->added = NULL; - } return 0; } - return PyList_SetSlice(self->added, start - self->length + 1, - PyList_GET_SIZE(self->added), - NULL); + if (self->nt) { + nt_invalidate_added(self, start - self->length + 1); + if (self->ntrev > start) + self->ntrev = (int)start; + } + return self->added + ? PyList_SetSlice(self->added, start - self->length + 1, + PyList_GET_SIZE(self->added), NULL) + : 0; } +/* + * Supported ops: + * + * slice deletion + * string assignment (extend node->rev mapping) + * string deletion (shrink node->rev mapping) + */ +static int index_assign_subscript(indexObject *self, PyObject *item, + PyObject *value) +{ + char *node; + Py_ssize_t nodelen; + long rev; + + if (PySlice_Check(item) && value == NULL) + return index_slice_del(self, item); + + if (node_check(item, &node, &nodelen) == -1) + return -1; + + if (value == NULL) + return self->nt ? nt_insert(self, node, -1) : 0; + rev = PyInt_AsLong(value); + if (rev > INT_MAX || rev < 0) { + if (!PyErr_Occurred()) + PyErr_SetString(PyExc_ValueError, "rev out of range"); + return -1; + } + return nt_insert(self, node, (int)rev); +} + +/* + * Find all RevlogNG entries in an index that has inline data. Update + * the optional "offsets" table with those entries. + */ static long inline_scan(indexObject *self, const char **offsets) { const char *data = PyString_AS_STRING(self->data); @@ -506,6 +971,11 @@ self->added = NULL; self->offsets = NULL; + self->nt = NULL; + self->ntlength = self->ntcapacity = 0; + self->ntdepth = self->ntsplits = 0; + self->ntlookups = self->ntmisses = 0; + self->ntrev = -1; Py_INCREF(self->data); if (self->inlined) { @@ -541,6 +1011,11 @@ PyTuple_GET_ITEM(args, 0)); } +static PyObject *index_nodemap(indexObject *self) +{ + return (PyObject *)self; +} + static void index_dealloc(indexObject *self) { _index_clearcaches(self); @@ -554,19 +1029,32 @@ 0, /* sq_concat */ 0, /* sq_repeat */ (ssizeargfunc)index_get, /* sq_item */ + 0, /* sq_slice */ + 0, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)index_contains, /* sq_contains */ }; static PyMappingMethods index_mapping_methods = { (lenfunc)index_length, /* mp_length */ - NULL, /* mp_subscript */ + (binaryfunc)index_getitem, /* mp_subscript */ (objobjargproc)index_assign_subscript, /* mp_ass_subscript */ }; static PyMethodDef index_methods[] = { {"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS, "clear the index caches"}, + {"get", (PyCFunction)index_m_get, METH_VARARGS, + "get an index entry"}, {"insert", (PyCFunction)index_insert, METH_VARARGS, "insert an index entry"}, + {"stats", (PyCFunction)index_stats, METH_NOARGS, + "stats for the index"}, + {NULL} /* Sentinel */ +}; + +static PyGetSetDef index_getset[] = { + {"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL}, {NULL} /* Sentinel */ }; @@ -601,7 +1089,7 @@ 0, /* tp_iternext */ index_methods, /* tp_methods */ 0, /* tp_members */ - 0, /* tp_getset */ + index_getset, /* tp_getset */ 0, /* tp_base */ 0, /* tp_dict */ 0, /* tp_descr_get */ @@ -613,10 +1101,10 @@ }; /* - * returns a tuple of the form (index, None, cache) with elements as + * returns a tuple of the form (index, index, cache) with elements as * follows: * - * index: an index object that lazily parses the RevlogNG records + * index: an index object that lazily parses RevlogNG records * cache: if data is inlined, a tuple (index_file_content, 0), else None * * added complications are for backwards compatibility @@ -651,6 +1139,8 @@ Py_INCREF(cache); } + Py_INCREF(idx); + tuple = Py_BuildValue("NN", idx, cache); if (!tuple) goto bail; @@ -674,8 +1164,6 @@ static void module_init(PyObject *mod) { - static const char nullid[20]; - if (PyType_Ready(&indexType) < 0) return; Py_INCREF(&indexType); @@ -710,4 +1198,3 @@ module_init(mod); } #endif - diff -r 1a420761fcb7 -r e8d37b78acfb mercurial/revlog.py --- a/mercurial/revlog.py Thu Apr 12 20:22:18 2012 -0500 +++ b/mercurial/revlog.py Thu Apr 12 14:05:59 2012 -0700 @@ -174,7 +174,7 @@ def parseindex(self, data, inline): # call the C implementation to parse the index data index, cache = parsers.parse_index2(data, inline) - return index, None, cache + return index, getattr(index, 'nodemap', None), cache def packentry(self, entry, node, version, rev): p = _pack(indexformatng, *entry) @@ -295,10 +295,21 @@ except KeyError: return False + def clearcaches(self): + try: + self._nodecache.clearcaches() + except AttributeError: + self._nodecache = {nullid: nullrev} + self._nodepos = None + def rev(self, node): try: return self._nodecache[node] + except RevlogError: + # parsers.c radix tree lookup failed + raise LookupError(node, self.indexfile, _('no node')) except KeyError: + # pure python cache lookup failed n = self._nodecache i = self.index p = self._nodepos diff -r 1a420761fcb7 -r e8d37b78acfb tests/test-parseindex2.py --- a/tests/test-parseindex2.py Thu Apr 12 20:22:18 2012 -0500 +++ b/tests/test-parseindex2.py Thu Apr 12 14:05:59 2012 -0700 @@ -110,6 +110,13 @@ if py_res_2 != c_res_2: print "Parse index result (no inlined data) differs!" + ix = parsers.parse_index2(data_inlined, True)[0] + for i, r in enumerate(ix): + if r[7] == nullid: + i = -1 + if ix[r[7]] != i: + print 'Reverse lookup inconsistent for %r' % r[7].encode('hex') + print "done" runtest()