changeset 15530:eeac5e179243

mdiff: replace wscleanup() regexps with C loops On my system it reduces: hg annotate -w mercurial/commands.py from 36s to less than 8s, to be compared with 6.3s when run without whitespace options.
author Patrick Mezard <pmezard@gmail.com>
date Fri, 18 Nov 2011 14:23:03 +0100
parents b35cf47286a6
children 80ec63090a7a
files mercurial/bdiff.c mercurial/mdiff.py mercurial/pure/bdiff.py tests/test-bdiff.py tests/test-bdiff.py.out
diffstat 5 files changed, 69 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/bdiff.c	Fri Nov 18 14:16:47 2011 +0100
+++ b/mercurial/bdiff.c	Fri Nov 18 14:23:03 2011 +0100
@@ -425,11 +425,55 @@
 	return result ? result : PyErr_NoMemory();
 }
 
+/*
+ * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
+ * reduce whitespace sequences to a single space and trim remaining whitespace
+ * from end of lines.
+ */
+static PyObject *fixws(PyObject *self, PyObject *args)
+{
+	PyObject *s, *result = NULL;
+	char allws, c;
+	const char *r;
+	int i, rlen, wlen = 0;
+	char *w;
+
+	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
+		return NULL;
+	r = PyBytes_AsString(s);
+	rlen = PyBytes_Size(s);
+
+	w = (char *)malloc(rlen);
+	if (!w)
+		goto nomem;
+
+	for (i = 0; i != rlen; i++) {
+		c = r[i];
+		if (c == ' ' || c == '\t' || c == '\r') {
+			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
+				w[wlen++] = ' ';
+		} else if (c == '\n' && !allws
+			  && wlen > 0 && w[wlen - 1] == ' ') {
+			w[wlen - 1] = '\n';
+		} else {
+			w[wlen++] = c;
+		}
+	}
+
+	result = PyBytes_FromStringAndSize(w, wlen);
+
+nomem:
+	free(w);
+	return result ? result : PyErr_NoMemory();
+}
+
+
 static char mdiff_doc[] = "Efficient binary diff.";
 
 static PyMethodDef methods[] = {
 	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
 	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
+	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
 	{NULL, NULL}
 };
 
--- a/mercurial/mdiff.py	Fri Nov 18 14:16:47 2011 +0100
+++ b/mercurial/mdiff.py	Fri Nov 18 14:23:03 2011 +0100
@@ -67,10 +67,9 @@
 
 def wsclean(opts, text, blank=True):
     if opts.ignorews:
-        text = re.sub('[ \t\r]+', '', text)
+        text = bdiff.fixws(text, 1)
     elif opts.ignorewsamount:
-        text = re.sub('[ \t\r]+', ' ', text)
-        text = text.replace(' \n', '\n')
+        text = bdiff.fixws(text, 0)
     if blank and opts.ignoreblanklines:
         text = re.sub('\n+', '\n', text).strip('\n')
     return text
--- a/mercurial/pure/bdiff.py	Fri Nov 18 14:16:47 2011 +0100
+++ b/mercurial/pure/bdiff.py	Fri Nov 18 14:23:03 2011 +0100
@@ -5,7 +5,7 @@
 # This software may be used and distributed according to the terms of the
 # GNU General Public License version 2 or any later version.
 
-import struct, difflib
+import struct, difflib, re
 
 def splitnewlines(text):
     '''like str.splitlines, but only split on newlines.'''
@@ -78,3 +78,10 @@
     d = _normalizeblocks(an, bn, d)
     return [(i, i + n, j, j + n) for (i, j, n) in d]
 
+def fixws(text, allws):
+    if allws:
+        text = re.sub('[ \t\r]+', '', text)
+    else:
+        text = re.sub('[ \t\r]+', ' ', text)
+        text = text.replace(' \n', '\n')
+    return text
--- a/tests/test-bdiff.py	Fri Nov 18 14:16:47 2011 +0100
+++ b/tests/test-bdiff.py	Fri Nov 18 14:23:03 2011 +0100
@@ -50,3 +50,17 @@
 showdiff("x\n\nx\n\nx\n\nx\n\nz\n", "x\n\nx\n\ny\n\nx\n\ny\n\nx\n\nz\n")
 
 print "done"
+
+def testfixws(a, b, allws):
+    c = bdiff.fixws(a, allws)
+    if c != b:
+        print "*** fixws", repr(a), repr(b), allws
+        print "got:"
+        print repr(c)
+
+testfixws(" \ta\r b\t\n", "ab\n", 1)
+testfixws(" \ta\r b\t\n", " a b\n", 0)
+testfixws("", "", 1)
+testfixws("", "", 0)
+
+print "done"
--- a/tests/test-bdiff.py.out	Fri Nov 18 14:16:47 2011 +0100
+++ b/tests/test-bdiff.py.out	Fri Nov 18 14:23:03 2011 +0100
@@ -21,3 +21,4 @@
 6 6 'y\n\n'
 9 9 'y\n\n'
 done
+done