comparison mercurial/cext/bdiff.c @ 32369:3b88a7fa97d8

bdiff: switch to policy importer # no-check-commit
author Yuya Nishihara <yuya@tcha.org>
date Sat, 13 Aug 2016 12:12:50 +0900
parents mercurial/bdiff_module.c@4195b84940e9
children b90e8da190da
comparison
equal deleted inserted replaced
32368:008d37c4d783 32369:3b88a7fa97d8
1 /*
2 bdiff.c - efficient binary diff extension for Mercurial
3
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
5
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
8
9 Based roughly on Python difflib
10 */
11
12 #define PY_SSIZE_T_CLEAN
13 #include <Python.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <limits.h>
17
18 #include "bdiff.h"
19 #include "bitmanipulation.h"
20 #include "util.h"
21
22
23 static PyObject *blocks(PyObject *self, PyObject *args)
24 {
25 PyObject *sa, *sb, *rl = NULL, *m;
26 struct bdiff_line *a, *b;
27 struct bdiff_hunk l, *h;
28 int an, bn, count, pos = 0;
29
30 l.next = NULL;
31
32 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
33 return NULL;
34
35 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
36 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
37
38 if (!a || !b)
39 goto nomem;
40
41 count = bdiff_diff(a, an, b, bn, &l);
42 if (count < 0)
43 goto nomem;
44
45 rl = PyList_New(count);
46 if (!rl)
47 goto nomem;
48
49 for (h = l.next; h; h = h->next) {
50 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
51 PyList_SetItem(rl, pos, m);
52 pos++;
53 }
54
55 nomem:
56 free(a);
57 free(b);
58 bdiff_freehunks(l.next);
59 return rl ? rl : PyErr_NoMemory();
60 }
61
62 static PyObject *bdiff(PyObject *self, PyObject *args)
63 {
64 char *sa, *sb, *rb, *ia, *ib;
65 PyObject *result = NULL;
66 struct bdiff_line *al, *bl;
67 struct bdiff_hunk l, *h;
68 int an, bn, count;
69 Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax;
70 PyThreadState *_save;
71
72 l.next = NULL;
73
74 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
75 return NULL;
76
77 if (la > UINT_MAX || lb > UINT_MAX) {
78 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
79 return NULL;
80 }
81
82 _save = PyEval_SaveThread();
83
84 lmax = la > lb ? lb : la;
85 for (ia = sa, ib = sb;
86 li < lmax && *ia == *ib;
87 ++li, ++ia, ++ib)
88 if (*ia == '\n')
89 lcommon = li + 1;
90 /* we can almost add: if (li == lmax) lcommon = li; */
91
92 an = bdiff_splitlines(sa + lcommon, la - lcommon, &al);
93 bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl);
94 if (!al || !bl)
95 goto nomem;
96
97 count = bdiff_diff(al, an, bl, bn, &l);
98 if (count < 0)
99 goto nomem;
100
101 /* calculate length of output */
102 la = lb = 0;
103 for (h = l.next; h; h = h->next) {
104 if (h->a1 != la || h->b1 != lb)
105 len += 12 + bl[h->b1].l - bl[lb].l;
106 la = h->a2;
107 lb = h->b2;
108 }
109 PyEval_RestoreThread(_save);
110 _save = NULL;
111
112 result = PyBytes_FromStringAndSize(NULL, len);
113
114 if (!result)
115 goto nomem;
116
117 /* build binary patch */
118 rb = PyBytes_AsString(result);
119 la = lb = 0;
120
121 for (h = l.next; h; h = h->next) {
122 if (h->a1 != la || h->b1 != lb) {
123 len = bl[h->b1].l - bl[lb].l;
124 putbe32((uint32_t)(al[la].l + lcommon - al->l), rb);
125 putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4);
126 putbe32((uint32_t)len, rb + 8);
127 memcpy(rb + 12, bl[lb].l, len);
128 rb += 12 + len;
129 }
130 la = h->a2;
131 lb = h->b2;
132 }
133
134 nomem:
135 if (_save)
136 PyEval_RestoreThread(_save);
137 free(al);
138 free(bl);
139 bdiff_freehunks(l.next);
140 return result ? result : PyErr_NoMemory();
141 }
142
143 /*
144 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
145 * reduce whitespace sequences to a single space and trim remaining whitespace
146 * from end of lines.
147 */
148 static PyObject *fixws(PyObject *self, PyObject *args)
149 {
150 PyObject *s, *result = NULL;
151 char allws, c;
152 const char *r;
153 Py_ssize_t i, rlen, wlen = 0;
154 char *w;
155
156 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
157 return NULL;
158 r = PyBytes_AsString(s);
159 rlen = PyBytes_Size(s);
160
161 w = (char *)PyMem_Malloc(rlen ? rlen : 1);
162 if (!w)
163 goto nomem;
164
165 for (i = 0; i != rlen; i++) {
166 c = r[i];
167 if (c == ' ' || c == '\t' || c == '\r') {
168 if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
169 w[wlen++] = ' ';
170 } else if (c == '\n' && !allws
171 && wlen > 0 && w[wlen - 1] == ' ') {
172 w[wlen - 1] = '\n';
173 } else {
174 w[wlen++] = c;
175 }
176 }
177
178 result = PyBytes_FromStringAndSize(w, wlen);
179
180 nomem:
181 PyMem_Free(w);
182 return result ? result : PyErr_NoMemory();
183 }
184
185
186 static char mdiff_doc[] = "Efficient binary diff.";
187
188 static PyMethodDef methods[] = {
189 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
190 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
191 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
192 {NULL, NULL}
193 };
194
195 static const int version = 1;
196
197 #ifdef IS_PY3K
198 static struct PyModuleDef bdiff_module = {
199 PyModuleDef_HEAD_INIT,
200 "bdiff",
201 mdiff_doc,
202 -1,
203 methods
204 };
205
206 PyMODINIT_FUNC PyInit_bdiff(void)
207 {
208 PyObject *m;
209 m = PyModule_Create(&bdiff_module);
210 PyModule_AddIntConstant(m, "version", version);
211 return m;
212 }
213 #else
214 PyMODINIT_FUNC initbdiff(void)
215 {
216 PyObject *m;
217 m = Py_InitModule3("bdiff", methods, mdiff_doc);
218 PyModule_AddIntConstant(m, "version", version);
219 }
220 #endif