shortest: use nodetree for finding shortest node within revset
This speeds up `hg log -T '{shortest(node,1)}\n'` in my repo from 12s
to 4.5s. That's very close to the 4.1s it takes without the
disambiguation revset configured. My repo has 69.5k revisions, of
which 550 were in the configured revset ("not public()").
Differential Revision: https://phab.mercurial-scm.org/D4120
--- a/mercurial/cext/parsers.c Mon Aug 20 15:57:03 2018 -0700
+++ b/mercurial/cext/parsers.c Sun Aug 05 00:42:07 2018 -0700
@@ -713,7 +713,7 @@
void manifest_module_init(PyObject *mod);
void revlog_module_init(PyObject *mod);
-static const int version = 8;
+static const int version = 9;
static void module_init(PyObject *mod)
{
--- a/mercurial/cext/revlog.c Mon Aug 20 15:57:03 2018 -0700
+++ b/mercurial/cext/revlog.c Sun Aug 05 00:42:07 2018 -0700
@@ -1087,6 +1087,23 @@
return -1;
}
+static PyObject *nt_insert_py(nodetree *self, PyObject *args)
+{
+ Py_ssize_t rev;
+ const char *node;
+ if (!PyArg_ParseTuple(args, "n", &rev))
+ return NULL;
+ const Py_ssize_t length = index_length(self->index);
+ if (rev < 0 || rev >= length) {
+ PyErr_SetString(PyExc_ValueError, "revlog index out of range");
+ return NULL;
+ }
+ node = index_node_existing(self->index, rev);
+ if (nt_insert(self, node, rev) == -1)
+ return NULL;
+ Py_RETURN_NONE;
+}
+
static int nt_delete_node(nodetree *self, const char *node)
{
/* rev==-2 happens to get encoded as 0, which is interpreted as not set */
@@ -1181,6 +1198,27 @@
return -3;
}
+static PyObject *nt_shortest_py(nodetree *self, PyObject *args)
+{
+ PyObject *val;
+ char *node;
+ int length;
+
+ if (!PyArg_ParseTuple(args, "O", &val))
+ return NULL;
+ if (node_check(val, &node) == -1)
+ return NULL;
+
+ length = nt_shortest(self, node);
+ if (length == -3)
+ return NULL;
+ if (length == -2) {
+ raise_revlog_error();
+ return NULL;
+ }
+ return PyInt_FromLong(length);
+}
+
static void nt_dealloc(nodetree *self)
{
Py_XDECREF(self->index);
@@ -1189,6 +1227,14 @@
PyObject_Del(self);
}
+static PyMethodDef nt_methods[] = {
+ {"insert", (PyCFunction)nt_insert_py, METH_VARARGS,
+ "insert an index entry"},
+ {"shortest", (PyCFunction)nt_shortest_py, METH_VARARGS,
+ "find length of shortest hex nodeid of a binary ID"},
+ {NULL} /* Sentinel */
+};
+
static PyTypeObject nodetreeType = {
PyVarObject_HEAD_INIT(NULL, 0) /* header */
"parsers.nodetree", /* tp_name */
@@ -1217,7 +1263,7 @@
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
- 0, /* tp_methods */
+ nt_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
--- a/mercurial/policy.py Mon Aug 20 15:57:03 2018 -0700
+++ b/mercurial/policy.py Sun Aug 05 00:42:07 2018 -0700
@@ -69,7 +69,7 @@
(r'cext', r'bdiff'): 3,
(r'cext', r'mpatch'): 1,
(r'cext', r'osutil'): 4,
- (r'cext', r'parsers'): 8,
+ (r'cext', r'parsers'): 9,
}
# map import request to other package or module
--- a/mercurial/scmutil.py Mon Aug 20 15:57:03 2018 -0700
+++ b/mercurial/scmutil.py Sun Aug 05 00:42:07 2018 -0700
@@ -34,6 +34,7 @@
obsutil,
pathutil,
phases,
+ policy,
pycompat,
revsetlang,
similar,
@@ -52,6 +53,8 @@
else:
from . import scmposix as scmplatform
+parsers = policy.importmod(r'parsers')
+
termsize = scmplatform.termsize
class status(tuple):
@@ -514,6 +517,24 @@
cache['disambiguationrevset'] = revs
if cl.rev(node) in revs:
hexnode = hex(node)
+ nodetree = None
+ if cache is not None:
+ nodetree = cache.get('disambiguationnodetree')
+ if not nodetree:
+ try:
+ nodetree = parsers.nodetree(cl.index, len(revs))
+ except AttributeError:
+ # no native nodetree
+ pass
+ else:
+ for r in revs:
+ nodetree.insert(r)
+ if cache is not None:
+ cache['disambiguationnodetree'] = nodetree
+ if nodetree is not None:
+ length = max(nodetree.shortest(node), minlength)
+ prefix = hexnode[:length]
+ return disambiguate(prefix)
for length in range(minlength, len(hexnode) + 1):
matches = []
prefix = hexnode[:length]