changeset 36675:430fdb717549

bdiff: add a xdiffblocks method This is similar to `bdiff.blocks`, but uses xdiff as the backend. The indent heuristic is turned on by default since it has little overhead and improves diff quality significantly. Differential Revision: https://phab.mercurial-scm.org/D2602
author Jun Wu <quark@fb.com>
date Sat, 03 Mar 2018 12:39:14 -0800
parents c420792217c8
children c6a61298ac32
files mercurial/cext/bdiff.c mercurial/policy.py setup.py
diffstat 3 files changed, 84 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/cext/bdiff.c	Sat Mar 03 12:39:11 2018 -0800
+++ b/mercurial/cext/bdiff.c	Sat Mar 03 12:39:14 2018 -0800
@@ -17,6 +17,7 @@
 
 #include "bdiff.h"
 #include "bitmanipulation.h"
+#include "thirdparty/xdiff/xdiff.h"
 #include "util.h"
 
 static PyObject *blocks(PyObject *self, PyObject *args)
@@ -256,6 +257,64 @@
 	return NULL;
 }
 
+static int hunk_consumer(long a1, long a2, long b1, long b2, void *priv)
+{
+	PyObject *rl = (PyObject *)priv;
+	PyObject *m = Py_BuildValue("llll", a1, a2, b1, b2);
+	if (!m)
+		return -1;
+	if (PyList_Append(rl, m) != 0) {
+		Py_DECREF(m);
+		return -1;
+	}
+	return 0;
+}
+
+static PyObject *xdiffblocks(PyObject *self, PyObject *args)
+{
+	Py_ssize_t la, lb;
+	mmfile_t a, b;
+	PyObject *rl;
+
+	xpparam_t xpp = {
+	    XDF_INDENT_HEURISTIC, /* flags */
+	    NULL,                 /* anchors */
+	    0,                    /* anchors_nr */
+	};
+	xdemitconf_t xecfg = {
+	    0,                  /* ctxlen */
+	    0,                  /* interhunkctxlen */
+	    XDL_EMIT_BDIFFHUNK, /* flags */
+	    NULL,               /* find_func */
+	    NULL,               /* find_func_priv */
+	    hunk_consumer,      /* hunk_consume_func */
+	};
+	xdemitcb_t ecb = {
+	    NULL, /* priv */
+	    NULL, /* outf */
+	};
+
+	if (!PyArg_ParseTuple(args, PY23("s#s#", "y#y#"), &a.ptr, &la, &b.ptr,
+	                      &lb))
+		return NULL;
+
+	a.size = la;
+	b.size = lb;
+
+	rl = PyList_New(0);
+	if (!rl)
+		return PyErr_NoMemory();
+
+	ecb.priv = rl;
+
+	if (xdl_diff(&a, &b, &xpp, &xecfg, &ecb) != 0) {
+		Py_DECREF(rl);
+		return PyErr_NoMemory();
+	}
+
+	return rl;
+}
+
 static char mdiff_doc[] = "Efficient binary diff.";
 
 static PyMethodDef methods[] = {
@@ -264,10 +323,12 @@
     {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
     {"splitnewlines", splitnewlines, METH_VARARGS,
      "like str.splitlines, but only split on newlines\n"},
+    {"xdiffblocks", xdiffblocks, METH_VARARGS,
+     "find a list of matching lines using xdiff algorithm\n"},
     {NULL, NULL},
 };
 
-static const int version = 2;
+static const int version = 3;
 
 #ifdef IS_PY3K
 static struct PyModuleDef bdiff_module = {
--- a/mercurial/policy.py	Sat Mar 03 12:39:11 2018 -0800
+++ b/mercurial/policy.py	Sat Mar 03 12:39:14 2018 -0800
@@ -66,7 +66,7 @@
 # keep in sync with "version" in C modules
 _cextversions = {
     (r'cext', r'base85'): 1,
-    (r'cext', r'bdiff'): 2,
+    (r'cext', r'bdiff'): 3,
     (r'cext', r'diffhelpers'): 1,
     (r'cext', r'mpatch'): 1,
     (r'cext', r'osutil'): 3,
--- a/setup.py	Sat Mar 03 12:39:11 2018 -0800
+++ b/setup.py	Sat Mar 03 12:39:14 2018 -0800
@@ -847,14 +847,33 @@
 if sys.platform == 'darwin':
     osutil_ldflags += ['-framework', 'ApplicationServices']
 
+xdiff_srcs = [
+    'mercurial/thirdparty/xdiff/xdiffi.c',
+    'mercurial/thirdparty/xdiff/xemit.c',
+    'mercurial/thirdparty/xdiff/xmerge.c',
+    'mercurial/thirdparty/xdiff/xprepare.c',
+    'mercurial/thirdparty/xdiff/xutils.c',
+]
+
+xdiff_headers = [
+    'mercurial/thirdparty/xdiff/xdiff.h',
+    'mercurial/thirdparty/xdiff/xdiffi.h',
+    'mercurial/thirdparty/xdiff/xemit.h',
+    'mercurial/thirdparty/xdiff/xinclude.h',
+    'mercurial/thirdparty/xdiff/xmacros.h',
+    'mercurial/thirdparty/xdiff/xprepare.h',
+    'mercurial/thirdparty/xdiff/xtypes.h',
+    'mercurial/thirdparty/xdiff/xutils.h',
+]
+
 extmodules = [
     Extension('mercurial.cext.base85', ['mercurial/cext/base85.c'],
               include_dirs=common_include_dirs,
               depends=common_depends),
     Extension('mercurial.cext.bdiff', ['mercurial/bdiff.c',
-                                       'mercurial/cext/bdiff.c'],
+                                       'mercurial/cext/bdiff.c'] + xdiff_srcs,
               include_dirs=common_include_dirs,
-              depends=common_depends + ['mercurial/bdiff.h']),
+              depends=common_depends + ['mercurial/bdiff.h'] + xdiff_headers),
     Extension('mercurial.cext.diffhelpers', ['mercurial/cext/diffhelpers.c'],
               include_dirs=common_include_dirs,
               depends=common_depends),