mdiff: replace wscleanup() regexps with C loops
On my system it reduces:
hg annotate -w mercurial/commands.py
from 36s to less than 8s, to be compared with 6.3s when run without whitespace
options.
--- a/mercurial/bdiff.c Fri Nov 18 14:16:47 2011 +0100
+++ b/mercurial/bdiff.c Fri Nov 18 14:23:03 2011 +0100
@@ -425,11 +425,55 @@
return result ? result : PyErr_NoMemory();
}
+/*
+ * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
+ * reduce whitespace sequences to a single space and trim remaining whitespace
+ * from end of lines.
+ */
+static PyObject *fixws(PyObject *self, PyObject *args)
+{
+ PyObject *s, *result = NULL;
+ char allws, c;
+ const char *r;
+ int i, rlen, wlen = 0;
+ char *w;
+
+ if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
+ return NULL;
+ r = PyBytes_AsString(s);
+ rlen = PyBytes_Size(s);
+
+ w = (char *)malloc(rlen);
+ if (!w)
+ goto nomem;
+
+ for (i = 0; i != rlen; i++) {
+ c = r[i];
+ if (c == ' ' || c == '\t' || c == '\r') {
+ if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
+ w[wlen++] = ' ';
+ } else if (c == '\n' && !allws
+ && wlen > 0 && w[wlen - 1] == ' ') {
+ w[wlen - 1] = '\n';
+ } else {
+ w[wlen++] = c;
+ }
+ }
+
+ result = PyBytes_FromStringAndSize(w, wlen);
+
+nomem:
+ free(w);
+ return result ? result : PyErr_NoMemory();
+}
+
+
static char mdiff_doc[] = "Efficient binary diff.";
static PyMethodDef methods[] = {
{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
+ {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
{NULL, NULL}
};
--- a/mercurial/mdiff.py Fri Nov 18 14:16:47 2011 +0100
+++ b/mercurial/mdiff.py Fri Nov 18 14:23:03 2011 +0100
@@ -67,10 +67,9 @@
def wsclean(opts, text, blank=True):
if opts.ignorews:
- text = re.sub('[ \t\r]+', '', text)
+ text = bdiff.fixws(text, 1)
elif opts.ignorewsamount:
- text = re.sub('[ \t\r]+', ' ', text)
- text = text.replace(' \n', '\n')
+ text = bdiff.fixws(text, 0)
if blank and opts.ignoreblanklines:
text = re.sub('\n+', '\n', text).strip('\n')
return text
--- a/mercurial/pure/bdiff.py Fri Nov 18 14:16:47 2011 +0100
+++ b/mercurial/pure/bdiff.py Fri Nov 18 14:23:03 2011 +0100
@@ -5,7 +5,7 @@
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
-import struct, difflib
+import struct, difflib, re
def splitnewlines(text):
'''like str.splitlines, but only split on newlines.'''
@@ -78,3 +78,10 @@
d = _normalizeblocks(an, bn, d)
return [(i, i + n, j, j + n) for (i, j, n) in d]
+def fixws(text, allws):
+ if allws:
+ text = re.sub('[ \t\r]+', '', text)
+ else:
+ text = re.sub('[ \t\r]+', ' ', text)
+ text = text.replace(' \n', '\n')
+ return text
--- a/tests/test-bdiff.py Fri Nov 18 14:16:47 2011 +0100
+++ b/tests/test-bdiff.py Fri Nov 18 14:23:03 2011 +0100
@@ -50,3 +50,17 @@
showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n")
print "done"
+
+def testfixws(a, b, allws):
+ c = bdiff.fixws(a, allws)
+ if c != b:
+ print "*** fixws", repr(a), repr(b), allws
+ print "got:"
+ print repr(c)
+
+testfixws(" \ta\r b\t\n", "ab\n", 1)
+testfixws(" \ta\r b\t\n", " a b\n", 0)
+testfixws("", "", 1)
+testfixws("", "", 0)
+
+print "done"
--- a/tests/test-bdiff.py.out Fri Nov 18 14:16:47 2011 +0100
+++ b/tests/test-bdiff.py.out Fri Nov 18 14:23:03 2011 +0100
@@ -21,3 +21,4 @@
6 6 'y\n\n'
9 9 'y\n\n'
done
+done