comparison mercurial/cext/bdiff.c @ 36146:29dd37a418aa

bdiff: write a native version of splitnewlines ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile before: ! wall 0.309280 comb 0.350000 user 0.290000 sys 0.060000 (best of 32) ./hg perfunidiff mercurial/manifest.py 0 --count 500 --profile after: ! wall 0.241572 comb 0.260000 user 0.240000 sys 0.020000 (best of 39) so it's about 20% faster. I hate Python. I wish we could usefully write this in Rust, but it doesn't look like that's realistic without using the cpython crate, which I'd still like to avoid. Differential Revision: https://phab.mercurial-scm.org/D1973
author Augie Fackler <augie@google.com>
date Thu, 25 Jan 2018 21:16:28 -0500
parents b4fdc6177b29
children 186c6df3a373
comparison
equal deleted inserted replaced
36145:4f3e989536c3 36146:29dd37a418aa
178 nomem: 178 nomem:
179 PyMem_Free(w); 179 PyMem_Free(w);
180 return result ? result : PyErr_NoMemory(); 180 return result ? result : PyErr_NoMemory();
181 } 181 }
182 182
183 static bool sliceintolist(PyObject *list, Py_ssize_t destidx,
184 const char *source, Py_ssize_t len)
185 {
186 PyObject *sliced = PyBytes_FromStringAndSize(source, len);
187 if (sliced == NULL)
188 return false;
189 PyList_SET_ITEM(list, destidx, sliced);
190 return true;
191 }
192
193 static PyObject *splitnewlines(PyObject *self, PyObject *args)
194 {
195 const char *text;
196 Py_ssize_t nelts = 0, size, i, start = 0;
197 PyObject *result = NULL;
198
199 if (!PyArg_ParseTuple(args, "s#", &text, &size)) {
200 goto abort;
201 }
202 if (!size) {
203 return PyList_New(0);
204 }
205 /* This loops to size-1 because if the last byte is a newline,
206 * we don't want to perform a split there. */
207 for (i = 0; i < size - 1; ++i) {
208 if (text[i] == '\n') {
209 ++nelts;
210 }
211 }
212 if ((result = PyList_New(nelts + 1)) == NULL)
213 goto abort;
214 nelts = 0;
215 for (i = 0; i < size - 1; ++i) {
216 if (text[i] == '\n') {
217 if (!sliceintolist(result, nelts++, text + start,
218 i - start + 1))
219 goto abort;
220 start = i + 1;
221 }
222 }
223 if (!sliceintolist(result, nelts++, text + start, size - start))
224 goto abort;
225 return result;
226 abort:
227 Py_XDECREF(result);
228 return NULL;
229 }
230
183 static char mdiff_doc[] = "Efficient binary diff."; 231 static char mdiff_doc[] = "Efficient binary diff.";
184 232
185 static PyMethodDef methods[] = { 233 static PyMethodDef methods[] = {
186 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, 234 {"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
187 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, 235 {"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
188 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, 236 {"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
237 {"splitnewlines", splitnewlines, METH_VARARGS,
238 "like str.splitlines, but only split on newlines\n"},
189 {NULL, NULL}, 239 {NULL, NULL},
190 }; 240 };
191 241
192 static const int version = 1; 242 static const int version = 2;
193 243
194 #ifdef IS_PY3K 244 #ifdef IS_PY3K
195 static struct PyModuleDef bdiff_module = { 245 static struct PyModuleDef bdiff_module = {
196 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods, 246 PyModuleDef_HEAD_INIT, "bdiff", mdiff_doc, -1, methods,
197 }; 247 };