diff mercurial/parsers.c @ 16414:e8d37b78acfb

parsers: use base-16 trie for faster node->rev mapping This greatly speeds up node->rev lookups, with results that are often user-perceptible: for instance, "hg --time log" of the node associated with rev 1000 on a linux-2.6 repo improves from 0.3 seconds to 0.03. I have not found any instances of slowdowns. The new perfnodelookup command in contrib/perf.py demonstrates the speedup more dramatically, since it performs no I/O. For a single lookup, the new code is about 40x faster. These changes also prepare the ground for the possibility of further improving the performance of prefix-based node lookups.
author Bryan O'Sullivan <bryano@fb.com>
date Thu, 12 Apr 2012 14:05:59 -0700
parents ee163a9cf37c
children d126a0d16856
line wrap: on
line diff
--- a/mercurial/parsers.c	Thu Apr 12 20:22:18 2012 -0500
+++ b/mercurial/parsers.c	Thu Apr 12 14:05:59 2012 -0700
@@ -215,10 +215,27 @@
 }
 
 /*
- * A list-like object that decodes the contents of a RevlogNG index
- * file on demand. It has limited support for insert and delete at the
- * last element before the end.  The last entry is always a sentinel
- * nullid.
+ * A base-16 trie for fast node->rev mapping.
+ *
+ * Positive value is index of the next node in the trie
+ * Negative value is a leaf: -(rev + 1)
+ * Zero is empty
+ */
+typedef struct {
+	int children[16];
+} nodetree;
+
+/*
+ * This class has two behaviours.
+ *
+ * When used in a list-like way (with integer keys), we decode an
+ * entry in a RevlogNG index file on demand. Our last entry is a
+ * sentinel, always a nullid.  We have limited support for
+ * integer-keyed insert and delete, only at elements right before the
+ * sentinel.
+ *
+ * With string keys, we lazily perform a reverse mapping from node to
+ * rev, using a base-16 trie.
  */
 typedef struct {
 	PyObject_HEAD
@@ -229,10 +246,18 @@
 	Py_ssize_t raw_length; /* original number of elements */
 	Py_ssize_t length;     /* current number of elements */
 	PyObject *added;       /* populated on demand */
+	nodetree *nt;          /* base-16 trie */
+	int ntlength;          /* # nodes in use */
+	int ntcapacity;        /* # nodes allocated */
+	int ntdepth;           /* maximum depth of tree */
+	int ntsplits;          /* # splits performed */
+	int ntrev;             /* last rev scanned */
+	int ntlookups;         /* # lookups */
+	int ntmisses;          /* # lookups that miss the cache */
 	int inlined;
 } indexObject;
 
-static Py_ssize_t index_length(indexObject *self)
+static Py_ssize_t index_length(const indexObject *self)
 {
 	if (self->added == NULL)
 		return self->length;
@@ -240,6 +265,7 @@
 }
 
 static PyObject *nullentry;
+static const char nullid[20];
 
 static long inline_scan(indexObject *self, const char **offsets);
 
@@ -249,7 +275,27 @@
 static char *tuple_format = "kiiiiiis#";
 #endif
 
-/* RevlogNG format (all in big endian, data may be inlined):
+/*
+ * Return a pointer to the beginning of a RevlogNG record.
+ */
+static const char *index_deref(indexObject *self, Py_ssize_t pos)
+{
+	if (self->inlined && pos > 0) {
+		if (self->offsets == NULL) {
+			self->offsets = malloc(self->raw_length *
+					       sizeof(*self->offsets));
+			if (self->offsets == NULL)
+				return (const char *)PyErr_NoMemory();
+			inline_scan(self, self->offsets);
+		}
+		return self->offsets[pos];
+	}
+
+	return PyString_AS_STRING(self->data) + pos * 64;
+}
+
+/*
+ * RevlogNG format (all in big endian, data may be inlined):
  *    6 bytes: offset
  *    2 bytes: flags
  *    4 bytes: compressed length
@@ -270,7 +316,10 @@
 	Py_ssize_t length = index_length(self);
 	PyObject *entry;
 
-	if (pos >= length) {
+	if (pos < 0)
+		pos += length;
+
+	if (pos < 0 || pos >= length) {
 		PyErr_SetString(PyExc_IndexError, "revlog index out of range");
 		return NULL;
 	}
@@ -298,17 +347,9 @@
 			return PyErr_NoMemory();
 	}
 
-	if (self->inlined && pos > 0) {
-		if (self->offsets == NULL) {
-			self->offsets = malloc(self->raw_length *
-					       sizeof(*self->offsets));
-			if (self->offsets == NULL)
-				return PyErr_NoMemory();
-			inline_scan(self, self->offsets);
-		}
-		data = self->offsets[pos];
-	} else
-		data = PyString_AS_STRING(self->data) + pos * 64;
+	data = index_deref(self, pos);
+	if (data == NULL)
+		return NULL;
 
 	memcpy(decode, data, 8 * sizeof(uint32_t));
 
@@ -341,26 +382,60 @@
 	return entry;
 }
 
+/*
+ * Return the 20-byte SHA of the node corresponding to the given rev.
+ */
+static const char *index_node(indexObject *self, Py_ssize_t pos)
+{
+	Py_ssize_t length = index_length(self);
+	const char *data;
+
+	if (pos == length - 1)
+		return nullid;
+
+	if (pos >= length)
+		return NULL;
+
+	if (pos >= self->length - 1) {
+		PyObject *tuple, *str;
+		tuple = PyList_GET_ITEM(self->added, pos - self->length + 1);
+		str = PyTuple_GetItem(tuple, 7);
+		return str ? PyString_AS_STRING(str) : NULL;
+	}
+
+	data = index_deref(self, pos);
+	return data ? data + 32 : NULL;
+}
+
+static int nt_insert(indexObject *self, const char *node, int rev);
+
+static int node_check(PyObject *obj, char **node, Py_ssize_t *nodelen)
+{
+	if (PyString_AsStringAndSize(obj, node, nodelen) == -1)
+		return -1;
+	if (*nodelen == 20)
+		return 0;
+	PyErr_SetString(PyExc_ValueError, "20-byte hash required");
+	return -1;
+}
+
 static PyObject *index_insert(indexObject *self, PyObject *args)
 {
-	PyObject *obj, *node;
+	PyObject *obj;
+	char *node;
 	long offset;
-	Py_ssize_t len;
+	Py_ssize_t len, nodelen;
 
 	if (!PyArg_ParseTuple(args, "lO", &offset, &obj))
 		return NULL;
 
 	if (!PyTuple_Check(obj) || PyTuple_GET_SIZE(obj) != 8) {
-		PyErr_SetString(PyExc_ValueError, "8-tuple required");
+		PyErr_SetString(PyExc_TypeError, "8-tuple required");
 		return NULL;
 	}
 
-	node = PyTuple_GET_ITEM(obj, 7);
-	if (!PyString_Check(node) || PyString_GET_SIZE(node) != 20) {
-		PyErr_SetString(PyExc_ValueError,
-				"20-byte hash required as last element");
+	if (node_check(PyTuple_GET_ITEM(obj, 7), &node, &nodelen) == -1)
 		return NULL;
-	}
 
 	len = index_length(self);
 
@@ -373,6 +448,12 @@
 		return NULL;
 	}
 
+	if (offset > INT_MAX) {
+		PyErr_SetString(PyExc_ValueError,
+				"currently only 2**31 revs supported");
+		return NULL;
+	}
+
 	if (self->added == NULL) {
 		self->added = PyList_New(0);
 		if (self->added == NULL)
@@ -382,6 +463,9 @@
 	if (PyList_Append(self->added, obj) == -1)
 		return NULL;
 
+	if (self->nt)
+		nt_insert(self, node, (int)offset);
+
 	Py_RETURN_NONE;
 }
 
@@ -401,26 +485,356 @@
 		free(self->offsets);
 		self->offsets = NULL;
 	}
+	if (self->nt) {
+		free(self->nt);
+		self->nt = NULL;
+	}
 }
 
 static PyObject *index_clearcaches(indexObject *self)
 {
 	_index_clearcaches(self);
+	self->ntlength = self->ntcapacity = 0;
+	self->ntdepth = self->ntsplits = 0;
+	self->ntrev = -1;
+	self->ntlookups = self->ntmisses = 0;
 	Py_RETURN_NONE;
 }
 
-static int index_assign_subscript(indexObject *self, PyObject *item,
-				  PyObject *value)
+static PyObject *index_stats(indexObject *self)
+{
+	PyObject *obj = PyDict_New();
+
+	if (obj == NULL)
+		return NULL;
+
+#define istat(__n, __d) \
+	if (PyDict_SetItemString(obj, __d, PyInt_FromLong(self->__n)) == -1) \
+		goto bail;
+
+	if (self->added) {
+		Py_ssize_t len = PyList_GET_SIZE(self->added);
+		if (PyDict_SetItemString(obj, "index entries added",
+					 PyInt_FromLong(len)) == -1)
+			goto bail;
+	}
+
+	if (self->raw_length != self->length - 1)
+		istat(raw_length, "revs on disk");
+	istat(length, "revs in memory");
+	istat(ntcapacity, "node trie capacity");
+	istat(ntdepth, "node trie depth");
+	istat(ntlength, "node trie count");
+	istat(ntlookups, "node trie lookups");
+	istat(ntmisses, "node trie misses");
+	istat(ntrev, "node trie last rev scanned");
+	istat(ntsplits, "node trie splits");
+
+#undef istat
+
+	return obj;
+
+bail:
+	Py_XDECREF(obj);
+	return NULL;
+}
+
+static inline int nt_level(const char *node, int level)
+{
+	int v = node[level>>1];
+	if (!(level & 1))
+		v >>= 4;
+	return v & 0xf;
+}
+
+static int nt_find(indexObject *self, const char *node, Py_ssize_t nodelen)
+{
+	int level, off;
+
+	if (nodelen == 20 && node[0] == '\0' && memcmp(node, nullid, 20) == 0)
+		return -1;
+
+	if (self->nt == NULL)
+		return -2;
+
+	for (level = off = 0; level < nodelen; level++) {
+		int k = nt_level(node, level);
+		nodetree *n = &self->nt[off];
+		int v = n->children[k];
+
+		if (v < 0) {
+			const char *n;
+			v = -v - 1;
+			n = index_node(self, v);
+			if (n == NULL)
+				return -2;
+			return memcmp(node, n, nodelen > 20 ? 20 : nodelen)
+				? -2 : v;
+		}
+		if (v == 0)
+			return -2;
+		off = v;
+	}
+	return -2;
+}
+
+static int nt_new(indexObject *self)
+{
+	if (self->ntlength == self->ntcapacity) {
+		self->ntcapacity *= 2;
+		self->nt = realloc(self->nt,
+				   self->ntcapacity * sizeof(nodetree));
+		if (self->nt == NULL) {
+			PyErr_SetString(PyExc_MemoryError, "out of memory");
+			return -1;
+		}
+		memset(&self->nt[self->ntlength], 0,
+		       sizeof(nodetree) * (self->ntcapacity - self->ntlength));
+	}
+	return self->ntlength++;
+}
+
+static int nt_insert(indexObject *self, const char *node, int rev)
+{
+	int level = 0;
+	int off = 0;
+
+	while (level < 20) {
+		int k = nt_level(node, level);
+		nodetree *n;
+		int v;
+
+		n = &self->nt[off];
+		v = n->children[k];
+
+		if (v == 0) {
+			n->children[k] = -rev - 1;
+			return 0;
+		}
+		if (v < 0) {
+			const char *oldnode = index_node(self, -v - 1);
+			int noff;
+
+			if (!oldnode || !memcmp(oldnode, node, 20)) {
+				n->children[k] = -rev - 1;
+				return 0;
+			}
+			noff = nt_new(self);
+			if (noff == -1)
+				return -1;
+			/* self->nt may have been changed by realloc */
+			self->nt[off].children[k] = noff;
+			off = noff;
+			n = &self->nt[off];
+			n->children[nt_level(oldnode, ++level)] = v;
+			if (level > self->ntdepth)
+				self->ntdepth = level;
+			self->ntsplits += 1;
+		} else {
+			level += 1;
+			off = v;
+		}
+	}
+
+	return -1;
+}
+
+/*
+ * Return values:
+ *
+ *   -3: error (exception set)
+ *   -2: not found (no exception set)
+ * rest: valid rev
+ */
+static int index_find_node(indexObject *self,
+			   const char *node, Py_ssize_t nodelen)
+{
+	int rev;
+
+	self->ntlookups++;
+	rev = nt_find(self, node, nodelen);
+	if (rev >= -1)
+		return rev;
+
+	if (self->nt == NULL) {
+		self->ntcapacity = self->raw_length < 4
+			? 4 : self->raw_length / 2;
+		self->nt = calloc(self->ntcapacity, sizeof(nodetree));
+		if (self->nt == NULL) {
+			PyErr_SetString(PyExc_MemoryError, "out of memory");
+			return -3;
+		}
+		self->ntlength = 1;
+		self->ntrev = (int)index_length(self) - 1;
+		self->ntlookups = 1;
+		self->ntmisses = 0;
+	}
+
+	/*
+	 * For the first handful of lookups, we scan the entire index,
+	 * and cache only the matching nodes. This optimizes for cases
+	 * like "hg tip", where only a few nodes are accessed.
+	 *
+	 * After that, we cache every node we visit, using a single
+	 * scan amortized over multiple lookups.  This gives the best
+	 * bulk performance, e.g. for "hg log".
+	 */
+	if (self->ntmisses++ < 4) {
+		for (rev = self->ntrev - 1; rev >= 0; rev--) {
+			const char *n = index_node(self, rev);
+			if (n == NULL)
+				return -2;
+			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
+				if (nt_insert(self, n, rev) == -1)
+					return -3;
+				break;
+			}
+		}
+	} else {
+		for (rev = self->ntrev - 1; rev >= 0; rev--) {
+			const char *n = index_node(self, rev);
+			if (n == NULL)
+				return -2;
+			if (nt_insert(self, n, rev) == -1)
+				return -3;
+			if (memcmp(node, n, nodelen > 20 ? 20 : nodelen) == 0) {
+				break;
+			}
+		}
+		self->ntrev = rev;
+	}
+
+	if (rev >= 0)
+		return rev;
+	return -2;
+}
+
+static PyObject *raise_revlog_error(void)
+{
+	static PyObject *errclass;
+	PyObject *mod = NULL, *errobj;
+
+	if (errclass == NULL) {
+		PyObject *dict;
+
+		mod = PyImport_ImportModule("mercurial.error");
+		if (mod == NULL)
+			goto classfail;
+
+		dict = PyModule_GetDict(mod);
+		if (dict == NULL)
+			goto classfail;
+
+		errclass = PyDict_GetItemString(dict, "RevlogError");
+		if (errclass == NULL) {
+			PyErr_SetString(PyExc_SystemError,
+					"could not find RevlogError");
+			goto classfail;
+		}
+		Py_INCREF(errclass);
+	}
+
+	errobj = PyObject_CallFunction(errclass, NULL);
+	if (errobj == NULL)
+		return NULL;
+	PyErr_SetObject(errclass, errobj);
+	return errobj;
+
+classfail:
+	Py_XDECREF(mod);
+	return NULL;
+}
+
+static PyObject *index_getitem(indexObject *self, PyObject *value)
+{
+	char *node;
+	Py_ssize_t nodelen;
+	int rev;
+
+	if (PyInt_Check(value))
+		return index_get(self, PyInt_AS_LONG(value));
+
+	if (PyString_AsStringAndSize(value, &node, &nodelen) == -1)
+		return NULL;
+	rev = index_find_node(self, node, nodelen);
+	if (rev >= -1)
+		return PyInt_FromLong(rev);
+	if (rev == -2)
+		raise_revlog_error();
+	return NULL;
+}
+
+static PyObject *index_m_get(indexObject *self, PyObject *args)
+{
+	char *node;
+	int nodelen, rev;
+
+	if (!PyArg_ParseTuple(args, "s#", &node, &nodelen))
+		return NULL;
+
+	rev = index_find_node(self, node, nodelen);
+	if (rev ==  -3)
+		return NULL;
+	if (rev == -2)
+		Py_RETURN_NONE;
+	return PyInt_FromLong(rev);
+}
+
+static int index_contains(indexObject *self, PyObject *value)
+{
+	char *node;
+	Py_ssize_t nodelen;
+
+	if (PyInt_Check(value)) {
+		long rev = PyInt_AS_LONG(value);
+		return rev >= -1 && rev < index_length(self);
+	}
+
+	if (!PyString_Check(value))
+		return 0;
+
+	node = PyString_AS_STRING(value);
+	nodelen = PyString_GET_SIZE(value);
+
+	switch (index_find_node(self, node, nodelen)) {
+	case -3:
+		return -1;
+	case -2:
+		return 0;
+	default:
+		return 1;
+	}
+}
+
+/*
+ * Invalidate any trie entries introduced by added revs.
+ */
+static void nt_invalidate_added(indexObject *self, Py_ssize_t start)
+{
+	Py_ssize_t i, len = PyList_GET_SIZE(self->added);
+
+	for (i = start; i < len; i++) {
+		PyObject *tuple = PyList_GET_ITEM(self->added, i);
+		PyObject *node = PyTuple_GET_ITEM(tuple, 7);
+
+		nt_insert(self, PyString_AS_STRING(node), -1);
+	}
+
+	if (start == 0) {
+		Py_DECREF(self->added);
+		self->added = NULL;
+	}
+}
+
+/*
+ * Delete a numeric range of revs, which must be at the end of the
+ * range, but exclude the sentinel nullid entry.
+ */
+static int index_slice_del(indexObject *self, PyObject *item)
 {
 	Py_ssize_t start, stop, step, slicelength;
 	Py_ssize_t length = index_length(self);
 
-	if (!PySlice_Check(item) || value != NULL) {
-		PyErr_SetString(PyExc_TypeError,
-				"revlog index only supports slice deletion");
-		return -1;
-	}
-
 	if (PySlice_GetIndicesEx((PySliceObject*)item, length,
 				 &start, &stop, &step, &slicelength) < 0)
 		return -1;
@@ -449,20 +863,71 @@
 		return -1;
 	}
 
-	if (start < self->length) {
+	if (start < self->length - 1) {
+		if (self->nt) {
+			Py_ssize_t i;
+
+			for (i = start + 1; i < self->length - 1; i++) {
+				const char *node = index_node(self, i);
+
+				if (node)
+					nt_insert(self, node, -1);
+			}
+			if (self->added)
+				nt_invalidate_added(self, 0);
+			if (self->ntrev > start)
+				self->ntrev = (int)start;
+		}
 		self->length = start + 1;
-		if (self->added) {
-			Py_DECREF(self->added);
-			self->added = NULL;
-		}
 		return 0;
 	}
 
-	return PyList_SetSlice(self->added, start - self->length + 1,
-			       PyList_GET_SIZE(self->added),
-			       NULL);
+	if (self->nt) {
+		nt_invalidate_added(self, start - self->length + 1);
+		if (self->ntrev > start)
+			self->ntrev = (int)start;
+	}
+	return self->added
+		? PyList_SetSlice(self->added, start - self->length + 1,
+				  PyList_GET_SIZE(self->added), NULL)
+		: 0;
 }
 
+/*
+ * Supported ops:
+ *
+ * slice deletion
+ * string assignment (extend node->rev mapping)
+ * string deletion (shrink node->rev mapping)
+ */
+static int index_assign_subscript(indexObject *self, PyObject *item,
+				  PyObject *value)
+{
+	char *node;
+	Py_ssize_t nodelen;
+	long rev;
+
+	if (PySlice_Check(item) && value == NULL)
+		return index_slice_del(self, item);
+
+	if (node_check(item, &node, &nodelen) == -1)
+		return -1;
+
+	if (value == NULL)
+		return self->nt ? nt_insert(self, node, -1) : 0;
+	rev = PyInt_AsLong(value);
+	if (rev > INT_MAX || rev < 0) {
+		if (!PyErr_Occurred())
+			PyErr_SetString(PyExc_ValueError, "rev out of range");
+		return -1;
+	}
+	return nt_insert(self, node, (int)rev);
+}
+
+/*
+ * Find all RevlogNG entries in an index that has inline data. Update
+ * the optional "offsets" table with those entries.
+ */
 static long inline_scan(indexObject *self, const char **offsets)
 {
 	const char *data = PyString_AS_STRING(self->data);
@@ -506,6 +971,11 @@
 
 	self->added = NULL;
 	self->offsets = NULL;
+	self->nt = NULL;
+	self->ntlength = self->ntcapacity = 0;
+	self->ntdepth = self->ntsplits = 0;
+	self->ntlookups = self->ntmisses = 0;
+	self->ntrev = -1;
 	Py_INCREF(self->data);
 
 	if (self->inlined) {
@@ -541,6 +1011,11 @@
 			       PyTuple_GET_ITEM(args, 0));
 }
 
+static PyObject *index_nodemap(indexObject *self)
+{
+	return (PyObject *)self;
+}
+
 static void index_dealloc(indexObject *self)
 {
 	_index_clearcaches(self);
@@ -554,19 +1029,32 @@
 	0,                       /* sq_concat */
 	0,                       /* sq_repeat */
 	(ssizeargfunc)index_get, /* sq_item */
+	0,                       /* sq_slice */
+	0,                       /* sq_ass_item */
+	0,                       /* sq_ass_slice */
+	(objobjproc)index_contains, /* sq_contains */
 };
 
 static PyMappingMethods index_mapping_methods = {
 	(lenfunc)index_length,                 /* mp_length */
-	NULL,                                  /* mp_subscript */
+	(binaryfunc)index_getitem,             /* mp_subscript */
 	(objobjargproc)index_assign_subscript, /* mp_ass_subscript */
 };
 
 static PyMethodDef index_methods[] = {
 	{"clearcaches", (PyCFunction)index_clearcaches, METH_NOARGS,
 	 "clear the index caches"},
+	{"get", (PyCFunction)index_m_get, METH_VARARGS,
+	 "get an index entry"},
 	{"insert", (PyCFunction)index_insert, METH_VARARGS,
 	 "insert an index entry"},
+	{"stats", (PyCFunction)index_stats, METH_NOARGS,
+	 "stats for the index"},
+	{NULL} /* Sentinel */
+};
+
+static PyGetSetDef index_getset[] = {
+	{"nodemap", (getter)index_nodemap, NULL, "nodemap", NULL},
 	{NULL} /* Sentinel */
 };
 
@@ -601,7 +1089,7 @@
 	0,                         /* tp_iternext */
 	index_methods,             /* tp_methods */
 	0,                         /* tp_members */
-	0,                         /* tp_getset */
+	index_getset,              /* tp_getset */
 	0,                         /* tp_base */
 	0,                         /* tp_dict */
 	0,                         /* tp_descr_get */
@@ -613,10 +1101,10 @@
 };
 
 /*
- * returns a tuple of the form (index, None, cache) with elements as
+ * returns a tuple of the form (index, index, cache) with elements as
  * follows:
  *
- * index: an index object that lazily parses the RevlogNG records
+ * index: an index object that lazily parses RevlogNG records
  * cache: if data is inlined, a tuple (index_file_content, 0), else None
  *
  * added complications are for backwards compatibility
@@ -651,6 +1139,8 @@
 		Py_INCREF(cache);
 	}
 
+	Py_INCREF(idx);
+
 	tuple = Py_BuildValue("NN", idx, cache);
 	if (!tuple)
 		goto bail;
@@ -674,8 +1164,6 @@
 
 static void module_init(PyObject *mod)
 {
-	static const char nullid[20];
-
 	if (PyType_Ready(&indexType) < 0)
 		return;
 	Py_INCREF(&indexType);
@@ -710,4 +1198,3 @@
 	module_init(mod);
 }
 #endif
-