Mercurial > hg
changeset 19728:3daabd2da78b
parse_manifest: rewrite to use memchr
memchr is usually smarter than a simple for loop. With gcc 4.4.6 and glibc 2.12
on x86-64, for a 20 MB, 200,000 file manifest, parse_manifest goes from 0.116
seconds to 0.095 seconds.
author | Siddharth Agarwal <sid0@fb.com> |
---|---|
date | Fri, 06 Sep 2013 23:47:59 -0700 |
parents | 3d07b4a2f743 |
children | dfefb719eb92 |
files | mercurial/parsers.c |
diffstat | 1 files changed, 15 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/parsers.c Mon Sep 16 12:17:55 2013 -0700 +++ b/mercurial/parsers.c Fri Sep 06 23:47:59 2013 -0700 @@ -77,7 +77,7 @@ static PyObject *parse_manifest(PyObject *self, PyObject *args) { PyObject *mfdict, *fdict; - char *str, *cur, *start, *zero; + char *str, *start, *end; int len; if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest", @@ -86,30 +86,34 @@ &str, &len)) goto quit; - for (start = cur = str, zero = NULL; cur < str + len; cur++) { + start = str; + end = str + len; + while (start < end) { PyObject *file = NULL, *node = NULL; PyObject *flags = NULL; + char *zero = NULL, *newline = NULL; ptrdiff_t nlen; - if (!*cur) { - zero = cur; - continue; - } - else if (*cur != '\n') - continue; - + zero = memchr(start, '\0', end - start); if (!zero) { PyErr_SetString(PyExc_ValueError, "manifest entry has no separator"); goto quit; } + newline = memchr(zero + 1, '\n', end - (zero + 1)); + if (!newline) { + PyErr_SetString(PyExc_ValueError, + "manifest contains trailing garbage"); + goto quit; + } + file = PyBytes_FromStringAndSize(start, zero - start); if (!file) goto bail; - nlen = cur - zero - 1; + nlen = newline - zero - 1; node = unhexlify(zero + 1, nlen > 40 ? 40 : (int)nlen); if (!node) @@ -128,8 +132,7 @@ if (PyDict_SetItem(mfdict, file, node) == -1) goto bail; - start = cur + 1; - zero = NULL; + start = newline + 1; Py_XDECREF(flags); Py_XDECREF(node); @@ -142,12 +145,6 @@ goto quit; } - if (len > 0 && *(cur - 1) != '\n') { - PyErr_SetString(PyExc_ValueError, - "manifest contains trailing garbage"); - goto quit; - } - Py_INCREF(Py_None); return Py_None; quit: