diff mercurial/cext/bdiff.c @ 36146:29dd37a418aa

bdiff: write a native version of splitnewlines ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile before: ! wall 0.309280 comb 0.350000 user 0.290000 sys 0.060000 (best of 32) ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile after: ! wall 0.241572 comb 0.260000 user 0.240000 sys 0.020000 (best of 39) so it's about 20% faster. I hate Python. I wish we could usefully write this in Rust, but it doesn't look like that's realistic without using the cpython crate, which I'd still like to avoid. Differential Revision: https://phab.mercurial-scm.org/D1973
author Augie Fackler <augie@google.com>
date Thu, 25 Jan 2018 21:16:28 -0500
parents b4fdc6177b29
children 186c6df3a373
line wrap: on
line diff
--- a/mercurial/cext/bdiff.c	Mon Feb 12 16:09:31 2018 +0100
+++ b/mercurial/cext/bdiff.c	Thu Jan 25 21:16:28 2018 -0500
@@ -180,16 +180,66 @@
 	return result ? result : PyErr_NoMemory();
 }
 
+static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
+                          const char *source, Py_ssize_t len)
+{
+	PyObject *sliced = PyBytes_FromStringAndSize(source, len);
+	if (sliced == NULL)
+		return false;
+	PyList_SET_ITEM(list, destidx, sliced);
+	return true;
+}
+
+static PyObject *splitnewlines(PyObject *self, PyObject *args)
+{
+	const char *text;
+	Py_ssize_t nelts = 0, size, i, start = 0;
+	PyObject *result = NULL;
+
+	if (!PyArg_ParseTuple(args, "s#", &text, &size)) {
+		goto abort;
+	}
+	if (!size) {
+		return PyList_New(0);
+	}
+	/* This loops to size-1 because if the last byte is a newline,
+	 * we don't want to perform a split there. */
+	for (i = 0; i < size - 1; ++i) {
+		if (text[i] == '\n') {
+			++nelts;
+		}
+	}
+	if ((result = PyList_New(nelts + 1)) == NULL)
+		goto abort;
+	nelts = 0;
+	for (i = 0; i < size - 1; ++i) {
+		if (text[i] == '\n') {
+			if (!sliceintolist(result, nelts++, text + start,
+			                   i - start + 1))
+				goto abort;
+			start = i + 1;
+		}
+	}
+	if (!sliceintolist(result, nelts++, text + start, size - start))
+		goto abort;
+	return result;
+abort:
+	Py_XDECREF(result);
+	return NULL;
+}
+
 static char mdiff_doc[] = "Efficient binary diff.";
 
 static PyMethodDef methods[] = {
     {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
     {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
     {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
+    {"splitnewlines", splitnewlines, METH_VARARGS,
+     "like str.splitlines, but only split on newlines\n"},
     {NULL, NULL},
 };
 
-static const int version = 1;
+static const int version = 2;
 
 #ifdef IS_PY3K
 static struct PyModuleDef bdiff_module = {