Mercurial > hg-stable
changeset 36184:29dd37a418aa
bdiff: write a native version of splitnewlines
./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile before:
! wall 0.309280 comb 0.350000 user 0.290000 sys 0.060000 (best of 32)
./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile after:
! wall 0.241572 comb 0.260000 user 0.240000 sys 0.020000 (best of 39)
so it's about 20% faster. I hate Python. I wish we could usefully
write this in Rust, but it doesn't look like that's realistic without
using the cpython crate, which I'd still like to avoid.
Differential Revision: https://phab.mercurial-scm.org/D1973
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 25 Jan 2018 21:16:28 -0500 |
parents | 4f3e989536c3 |
children | 6eb7d95f8970 |
files | mercurial/cext/bdiff.c mercurial/mdiff.py mercurial/policy.py mercurial/pure/bdiff.py |
diffstat | 4 files changed, 63 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/cext/bdiff.c Mon Feb 12 16:09:31 2018 +0100 +++ b/mercurial/cext/bdiff.c Thu Jan 25 21:16:28 2018 -0500 @@ -180,16 +180,66 @@ return result ? result : PyErr_NoMemory(); } +static bool sliceintolist(PyObject *list, Py_ssize_t destidx, + const char *source, Py_ssize_t len) +{ + PyObject *sliced = PyBytes_FromStringAndSize(source, len); + if (sliced == NULL) + return false; + PyList_SET_ITEM(list, destidx, sliced); + return true; +} + +static PyObject *splitnewlines(PyObject *self, PyObject *args) +{ + const char *text; + Py_ssize_t nelts = 0, size, i, start = 0; + PyObject *result = NULL; + + if (!PyArg_ParseTuple(args, "s#", &text, &size)) { + goto abort; + } + if (!size) { + return PyList_New(0); + } + /* This loops to size-1 because if the last byte is a newline, + * we don't want to perform a split there. */ + for (i = 0; i < size - 1; ++i) { + if (text[i] == '\n') { + ++nelts; + } + } + if ((result = PyList_New(nelts + 1)) == NULL) + goto abort; + nelts = 0; + for (i = 0; i < size - 1; ++i) { + if (text[i] == '\n') { + if (!sliceintolist(result, nelts++, text + start, + i - start + 1)) + goto abort; + start = i + 1; + } + } + if (!sliceintolist(result, nelts++, text + start, size - start)) + goto abort; + return result; +abort: + Py_XDECREF(result); + return NULL; +} + static char mdiff_doc[] = "Efficient binary diff."; static PyMethodDef methods[] = { {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, + {"splitnewlines", splitnewlines, METH_VARARGS, + "like str.splitlines, but only split on newlines\n"}, {NULL, NULL}, }; -static const int version = 1; +static const int version = 2; #ifdef IS_PY3K static struct PyModuleDef bdiff_module = {
--- a/mercurial/mdiff.py Mon Feb 12 16:09:31 2018 +0100 +++ b/mercurial/mdiff.py Thu Jan 25 21:16:28 2018 -0500 @@ -29,16 +29,7 @@ patches = mpatch.patches patchedsize = mpatch.patchedsize textdiff = bdiff.bdiff - -def splitnewlines(text): - '''like str.splitlines, but only split on newlines.''' - lines = [l + '\n' for l in text.split('\n')] - if lines: - if lines[-1] == '\n': - lines.pop() - else: - lines[-1] = lines[-1][:-1] - return lines +splitnewlines = bdiff.splitnewlines class diffopts(object): '''context is the number of context lines
--- a/mercurial/policy.py Mon Feb 12 16:09:31 2018 +0100 +++ b/mercurial/policy.py Thu Jan 25 21:16:28 2018 -0500 @@ -71,7 +71,7 @@ # keep in sync with "version" in C modules _cextversions = { (r'cext', r'base85'): 1, - (r'cext', r'bdiff'): 1, + (r'cext', r'bdiff'): 2, (r'cext', r'diffhelpers'): 1, (r'cext', r'mpatch'): 1, (r'cext', r'osutil'): 3,
--- a/mercurial/pure/bdiff.py Mon Feb 12 16:09:31 2018 +0100 +++ b/mercurial/pure/bdiff.py Thu Jan 25 21:16:28 2018 -0500 @@ -90,3 +90,13 @@ text = re.sub('[ \t\r]+', ' ', text) text = text.replace(' \n', '\n') return text + +def splitnewlines(text): + '''like str.splitlines, but only split on newlines.''' + lines = [l + '\n' for l in text.split('\n')] + if lines: + if lines[-1] == '\n': + lines.pop() + else: + lines[-1] = lines[-1][:-1] + return lines