changeset 19728:3daabd2da78b

parse_manifest: rewrite to use memchr memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc 2.12 on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from 0.116 seconds to 0.095 seconds.
author Siddharth Agarwal <sid0@fb.com>
date Fri, 06 Sep 2013 23:47:59 -0700
parents 3d07b4a2f743
children dfefb719eb92
files mercurial/parsers.c
diffstat 1 files changed, 15 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/parsers.c	Mon Sep 16 12:17:55 2013 -0700
+++ b/mercurial/parsers.c	Fri Sep 06 23:47:59 2013 -0700
@@ -77,7 +77,7 @@
 static PyObject *parse_manifest(PyObject *self, PyObject *args)
 {
 	PyObject *mfdict, *fdict;
-	char *str, *cur, *start, *zero;
+	char *str, *start, *end;
 	int len;
 
 	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
@@ -86,30 +86,34 @@
 			      &str, &len))
 		goto quit;
 
-	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
+	start = str;
+	end = str + len;
+	while (start < end) {
 		PyObject *file = NULL, *node = NULL;
 		PyObject *flags = NULL;
+		char *zero = NULL, *newline = NULL;
 		ptrdiff_t nlen;
 
-		if (!*cur) {
-			zero = cur;
-			continue;
-		}
-		else if (*cur != '\n')
-			continue;
-
+		zero = memchr(start, '\0', end - start);
 		if (!zero) {
 			PyErr_SetString(PyExc_ValueError,
 					"manifest entry has no separator");
 			goto quit;
 		}
 
+		newline = memchr(zero + 1, '\n', end - (zero + 1));
+		if (!newline) {
+			PyErr_SetString(PyExc_ValueError,
+					"manifest contains trailing garbage");
+			goto quit;
+		}
+
 		file = PyBytes_FromStringAndSize(start, zero - start);
 
 		if (!file)
 			goto bail;
 
-		nlen = cur - zero - 1;
+		nlen = newline - zero - 1;
 
 		node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
 		if (!node)
@@ -128,8 +132,7 @@
 		if (PyDict_SetItem(mfdict, file, node) == -1)
 			goto bail;
 
-		start = cur + 1;
-		zero = NULL;
+		start = newline + 1;
 
 		Py_XDECREF(flags);
 		Py_XDECREF(node);
@@ -142,12 +145,6 @@
 		goto quit;
 	}
 
-	if (len > 0 && *(cur - 1) != '\n') {
-		PyErr_SetString(PyExc_ValueError,
-				"manifest contains trailing garbage");
-		goto quit;
-	}
-
 	Py_INCREF(Py_None);
 	return Py_None;
 quit: