Mercurial > hg
changeset 16665:e410be860393
revlog: speed up prefix matching against nodes
The radix tree already contains all the information we need to
determine whether a short string is an unambiguous node identifier.
We now make use of this information.
In a kernel tree, this improves the performance of
"hg log -q -r24bf01de75" from 0.27 seconds to 0.06.
author | Bryan O'Sullivan <bryano@fb.com> |
---|---|
date | Sat, 12 May 2012 10:55:08 +0200 |
parents | 5bc6edf71b39 |
children | 76c744e0c5bb |
files | mercurial/parsers.c mercurial/revlog.py |
diffstat | 2 files changed, 83 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/parsers.c Sat May 12 10:55:08 2012 +0200 +++ b/mercurial/parsers.c Sat May 12 10:55:08 2012 +0200 @@ -566,7 +566,7 @@ return -2; if (hex) - maxlevel = nodelen > 40 ? 40 : nodelen; + maxlevel = nodelen > 40 ? 40 : (int)nodelen; else maxlevel = nodelen > 20 ? 40 : ((int)nodelen * 2); @@ -795,6 +795,77 @@ return NULL; } +static int nt_partialmatch(indexObject *self, const char *node, + Py_ssize_t nodelen) +{ + int rev; + + if (nt_init(self) == -1) + return -3; + + if (self->ntrev > 0) { + /* ensure that the radix tree is fully populated */ + for (rev = self->ntrev - 1; rev >= 0; rev--) { + const char *n = index_node(self, rev); + if (n == NULL) + return -2; + if (nt_insert(self, n, rev) == -1) + return -3; + } + self->ntrev = rev; + } + + return nt_find(self, node, nodelen, 1); +} + +static PyObject *index_partialmatch(indexObject *self, PyObject *args) +{ + const char *fullnode; + int nodelen; + char *node; + int rev, i; + + if (!PyArg_ParseTuple(args, "s#", &node, &nodelen)) + return NULL; + + if (nodelen < 4) { + PyErr_SetString(PyExc_ValueError, "key too short"); + return NULL; + } + + if (nodelen > 40) + nodelen = 40; + + for (i = 0; i < nodelen; i++) + hexdigit(node, i); + if (PyErr_Occurred()) { + /* input contains non-hex characters */ + PyErr_Clear(); + Py_RETURN_NONE; + } + + rev = nt_partialmatch(self, node, nodelen); + + switch (rev) { + case -4: + raise_revlog_error(); + case -3: + return NULL; + case -2: + Py_RETURN_NONE; + case -1: + return PyString_FromStringAndSize(nullid, 20); + } + + fullnode = index_node(self, rev); + if (fullnode == NULL) { + PyErr_Format(PyExc_IndexError, + "could not access rev %d", rev); + return NULL; + } + return PyString_FromStringAndSize(fullnode, 20); +} + static PyObject *index_m_get(indexObject *self, PyObject *args) { char *node; @@ -1077,6 +1148,8 @@ "get an index entry"}, {"insert", (PyCFunction)index_insert, METH_VARARGS, "insert an index entry"}, + {"partialmatch", (PyCFunction)index_partialmatch, METH_VARARGS, + "match a potentially ambiguous node ID"}, {"stats", (PyCFunction)index_stats, METH_NOARGS, "stats for the index"}, {NULL} /* Sentinel */
--- a/mercurial/revlog.py Sat May 12 10:55:08 2012 +0200 +++ b/mercurial/revlog.py Sat May 12 10:55:08 2012 +0200 @@ -756,6 +756,15 @@ pass def _partialmatch(self, id): + try: + return self.index.partialmatch(id) + except RevlogError: + # parsers.c radix tree lookup gave multiple matches + raise LookupError(id, self.indexfile, _("ambiguous identifier")) + except (AttributeError, ValueError): + # we are pure python, or key was too short to search radix tree + pass + if id in self._pcache: return self._pcache[id]