parse_manifest: rewrite to use memchr
memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc 2.12
on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from 0.116
seconds to 0.095 seconds.
--- a/mercurial/parsers.c Mon Sep 16 12:17:55 2013 -0700
+++ b/mercurial/parsers.c Fri Sep 06 23:47:59 2013 -0700
@@ -77,7 +77,7 @@
static PyObject *parse_manifest(PyObject *self, PyObject *args)
{
PyObject *mfdict, *fdict;
- char *str, *cur, *start, *zero;
+ char *str, *start, *end;
int len;
if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
@@ -86,30 +86,34 @@
&str, &len))
goto quit;
- for (start = cur = str, zero = NULL; cur < str + len; cur++) {
+ start = str;
+ end = str + len;
+ while (start < end) {
PyObject *file = NULL, *node = NULL;
PyObject *flags = NULL;
+ char *zero = NULL, *newline = NULL;
ptrdiff_t nlen;
- if (!*cur) {
- zero = cur;
- continue;
- }
- else if (*cur != '\n')
- continue;
-
+ zero = memchr(start, '\0', end - start);
if (!zero) {
PyErr_SetString(PyExc_ValueError,
"manifest entry has no separator");
goto quit;
}
+ newline = memchr(zero + 1, '\n', end - (zero + 1));
+ if (!newline) {
+ PyErr_SetString(PyExc_ValueError,
+ "manifest contains trailing garbage");
+ goto quit;
+ }
+
file = PyBytes_FromStringAndSize(start, zero - start);
if (!file)
goto bail;
- nlen = cur - zero - 1;
+ nlen = newline - zero - 1;
node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen);
if (!node)
@@ -128,8 +132,7 @@
if (PyDict_SetItem(mfdict, file, node) == -1)
goto bail;
- start = cur + 1;
- zero = NULL;
+ start = newline + 1;
Py_XDECREF(flags);
Py_XDECREF(node);
@@ -142,12 +145,6 @@
goto quit;
}
- if (len > 0 && *(cur - 1) != '\n') {
- PyErr_SetString(PyExc_ValueError,
- "manifest contains trailing garbage");
- goto quit;
- }
-
Py_INCREF(Py_None);
return Py_None;
quit: