Mercurial > hg
comparison mercurial/bdiff_module.c @ 29541:9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
author | Maciej Fijalkowski <fijall@gmail.com> |
---|---|
date | Wed, 13 Jul 2016 10:46:26 +0200 |
parents | |
children | 15635d8b17e0 |
comparison
equal
deleted
inserted
replaced
29540:4ce1fc91e30a | 29541:9631ff5ebbeb |
---|---|
1 /* | |
2 bdiff.c - efficient binary diff extension for Mercurial | |
3 | |
4 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> | |
5 | |
6 This software may be used and distributed according to the terms of | |
7 the GNU General Public License, incorporated herein by reference. | |
8 | |
9 Based roughly on Python difflib | |
10 */ | |
11 | |
12 #define PY_SSIZE_T_CLEAN | |
13 #include <Python.h> | |
14 #include <stdlib.h> | |
15 #include <string.h> | |
16 #include <limits.h> | |
17 | |
18 #include "bdiff.h" | |
19 #include "bitmanipulation.h" | |
20 | |
21 | |
22 static PyObject *blocks(PyObject *self, PyObject *args) | |
23 { | |
24 PyObject *sa, *sb, *rl = NULL, *m; | |
25 struct bdiff_line *a, *b; | |
26 struct bdiff_hunk l, *h; | |
27 int an, bn, count, pos = 0; | |
28 | |
29 l.next = NULL; | |
30 | |
31 if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) | |
32 return NULL; | |
33 | |
34 an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); | |
35 bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); | |
36 | |
37 if (!a || !b) | |
38 goto nomem; | |
39 | |
40 count = bdiff_diff(a, an, b, bn, &l); | |
41 if (count < 0) | |
42 goto nomem; | |
43 | |
44 rl = PyList_New(count); | |
45 if (!rl) | |
46 goto nomem; | |
47 | |
48 for (h = l.next; h; h = h->next) { | |
49 m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); | |
50 PyList_SetItem(rl, pos, m); | |
51 pos++; | |
52 } | |
53 | |
54 nomem: | |
55 free(a); | |
56 free(b); | |
57 bdiff_freehunks(l.next); | |
58 return rl ? rl : PyErr_NoMemory(); | |
59 } | |
60 | |
61 static PyObject *bdiff(PyObject *self, PyObject *args) | |
62 { | |
63 char *sa, *sb, *rb; | |
64 PyObject *result = NULL; | |
65 struct bdiff_line *al, *bl; | |
66 struct bdiff_hunk l, *h; | |
67 int an, bn, count; | |
68 Py_ssize_t len = 0, la, lb; | |
69 PyThreadState *_save; | |
70 | |
71 l.next = NULL; | |
72 | |
73 if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb)) | |
74 return NULL; | |
75 | |
76 if (la > UINT_MAX || lb > UINT_MAX) { | |
77 PyErr_SetString(PyExc_ValueError, "bdiff inputs too large"); | |
78 return NULL; | |
79 } | |
80 | |
81 _save = PyEval_SaveThread(); | |
82 an = bdiff_splitlines(sa, la, &al); | |
83 bn = bdiff_splitlines(sb, lb, &bl); | |
84 if (!al || !bl) | |
85 goto nomem; | |
86 | |
87 count = bdiff_diff(al, an, bl, bn, &l); | |
88 if (count < 0) | |
89 goto nomem; | |
90 | |
91 /* calculate length of output */ | |
92 la = lb = 0; | |
93 for (h = l.next; h; h = h->next) { | |
94 if (h->a1 != la || h->b1 != lb) | |
95 len += 12 + bl[h->b1].l - bl[lb].l; | |
96 la = h->a2; | |
97 lb = h->b2; | |
98 } | |
99 PyEval_RestoreThread(_save); | |
100 _save = NULL; | |
101 | |
102 result = PyBytes_FromStringAndSize(NULL, len); | |
103 | |
104 if (!result) | |
105 goto nomem; | |
106 | |
107 /* build binary patch */ | |
108 rb = PyBytes_AsString(result); | |
109 la = lb = 0; | |
110 | |
111 for (h = l.next; h; h = h->next) { | |
112 if (h->a1 != la || h->b1 != lb) { | |
113 len = bl[h->b1].l - bl[lb].l; | |
114 putbe32((uint32_t)(al[la].l - al->l), rb); | |
115 putbe32((uint32_t)(al[h->a1].l - al->l), rb + 4); | |
116 putbe32((uint32_t)len, rb + 8); | |
117 memcpy(rb + 12, bl[lb].l, len); | |
118 rb += 12 + len; | |
119 } | |
120 la = h->a2; | |
121 lb = h->b2; | |
122 } | |
123 | |
124 nomem: | |
125 if (_save) | |
126 PyEval_RestoreThread(_save); | |
127 free(al); | |
128 free(bl); | |
129 bdiff_freehunks(l.next); | |
130 return result ? result : PyErr_NoMemory(); | |
131 } | |
132 | |
133 /* | |
134 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise, | |
135 * reduce whitespace sequences to a single space and trim remaining whitespace | |
136 * from end of lines. | |
137 */ | |
138 static PyObject *fixws(PyObject *self, PyObject *args) | |
139 { | |
140 PyObject *s, *result = NULL; | |
141 char allws, c; | |
142 const char *r; | |
143 Py_ssize_t i, rlen, wlen = 0; | |
144 char *w; | |
145 | |
146 if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws)) | |
147 return NULL; | |
148 r = PyBytes_AsString(s); | |
149 rlen = PyBytes_Size(s); | |
150 | |
151 w = (char *)malloc(rlen ? rlen : 1); | |
152 if (!w) | |
153 goto nomem; | |
154 | |
155 for (i = 0; i != rlen; i++) { | |
156 c = r[i]; | |
157 if (c == ' ' || c == '\t' || c == '\r') { | |
158 if (!allws && (wlen == 0 || w[wlen - 1] != ' ')) | |
159 w[wlen++] = ' '; | |
160 } else if (c == '\n' && !allws | |
161 && wlen > 0 && w[wlen - 1] == ' ') { | |
162 w[wlen - 1] = '\n'; | |
163 } else { | |
164 w[wlen++] = c; | |
165 } | |
166 } | |
167 | |
168 result = PyBytes_FromStringAndSize(w, wlen); | |
169 | |
170 nomem: | |
171 free(w); | |
172 return result ? result : PyErr_NoMemory(); | |
173 } | |
174 | |
175 | |
176 static char mdiff_doc[] = "Efficient binary diff."; | |
177 | |
178 static PyMethodDef methods[] = { | |
179 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, | |
180 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, | |
181 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, | |
182 {NULL, NULL} | |
183 }; | |
184 | |
185 #ifdef IS_PY3K | |
186 static struct PyModuleDef bdiff_module = { | |
187 PyModuleDef_HEAD_INIT, | |
188 "bdiff", | |
189 mdiff_doc, | |
190 -1, | |
191 methods | |
192 }; | |
193 | |
194 PyMODINIT_FUNC PyInit_bdiff(void) | |
195 { | |
196 return PyModule_Create(&bdiff_module); | |
197 } | |
198 #else | |
199 PyMODINIT_FUNC initbdiff(void) | |
200 { | |
201 Py_InitModule3("bdiff", methods, mdiff_doc); | |
202 } | |
203 #endif |