author | Yuya Nishihara <yuya@tcha.org> |
Sat, 13 Aug 2016 12:12:50 +0900 | |
changeset 32369 | 3b88a7fa97d8 |
parent 32355 | mercurial/bdiff_module.c@4195b84940e9 |
child 34438 | b90e8da190da |
permissions | -rw-r--r-- |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
1 |
/* |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
2 |
bdiff.c - efficient binary diff extension for Mercurial |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
3 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
4 |
Copyright 2005, 2006 Matt Mackall <mpm@selenic.com> |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
5 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
6 |
This software may be used and distributed according to the terms of |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
7 |
the GNU General Public License, incorporated herein by reference. |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
8 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
9 |
Based roughly on Python difflib |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
10 |
*/ |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
11 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
12 |
#define PY_SSIZE_T_CLEAN |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
13 |
#include <Python.h> |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
14 |
#include <stdlib.h> |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
15 |
#include <string.h> |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
16 |
#include <limits.h> |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
17 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
18 |
#include "bdiff.h" |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
19 |
#include "bitmanipulation.h" |
30170
15635d8b17e0
bdiff: include util.h
Gregory Szorc <gregory.szorc@gmail.com>
parents:
29541
diff
changeset
|
20 |
#include "util.h" |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
21 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
22 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
23 |
static PyObject *blocks(PyObject *self, PyObject *args) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
24 |
{ |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
25 |
PyObject *sa, *sb, *rl = NULL, *m; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
26 |
struct bdiff_line *a, *b; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
27 |
struct bdiff_hunk l, *h; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
28 |
int an, bn, count, pos = 0; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
29 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
30 |
l.next = NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
31 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
32 |
if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb)) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
33 |
return NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
34 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
35 |
an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
36 |
bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
37 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
38 |
if (!a || !b) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
39 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
40 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
41 |
count = bdiff_diff(a, an, b, bn, &l); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
42 |
if (count < 0) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
43 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
44 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
45 |
rl = PyList_New(count); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
46 |
if (!rl) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
47 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
48 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
49 |
for (h = l.next; h; h = h->next) { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
50 |
m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
51 |
PyList_SetItem(rl, pos, m); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
52 |
pos++; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
53 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
54 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
55 |
nomem: |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
56 |
free(a); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
57 |
free(b); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
58 |
bdiff_freehunks(l.next); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
59 |
return rl ? rl : PyErr_NoMemory(); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
60 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
61 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
62 |
static PyObject *bdiff(PyObject *self, PyObject *args) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
63 |
{ |
30561
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
64 |
char *sa, *sb, *rb, *ia, *ib; |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
65 |
PyObject *result = NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
66 |
struct bdiff_line *al, *bl; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
67 |
struct bdiff_hunk l, *h; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
68 |
int an, bn, count; |
30561
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
69 |
Py_ssize_t len = 0, la, lb, li = 0, lcommon = 0, lmax; |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
70 |
PyThreadState *_save; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
71 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
72 |
l.next = NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
73 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
74 |
if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb)) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
75 |
return NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
76 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
77 |
if (la > UINT_MAX || lb > UINT_MAX) { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
78 |
PyErr_SetString(PyExc_ValueError, "bdiff inputs too large"); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
79 |
return NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
80 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
81 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
82 |
_save = PyEval_SaveThread(); |
30561
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
83 |
|
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
84 |
lmax = la > lb ? lb : la; |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
85 |
for (ia = sa, ib = sb; |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
86 |
li < lmax && *ia == *ib; |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
87 |
++li, ++ia, ++ib) |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
88 |
if (*ia == '\n') |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
89 |
lcommon = li + 1; |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
90 |
/* we can almost add: if (li == lmax) lcommon = li; */ |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
91 |
|
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
92 |
an = bdiff_splitlines(sa + lcommon, la - lcommon, &al); |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
93 |
bn = bdiff_splitlines(sb + lcommon, lb - lcommon, &bl); |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
94 |
if (!al || !bl) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
95 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
96 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
97 |
count = bdiff_diff(al, an, bl, bn, &l); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
98 |
if (count < 0) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
99 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
100 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
101 |
/* calculate length of output */ |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
102 |
la = lb = 0; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
103 |
for (h = l.next; h; h = h->next) { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
104 |
if (h->a1 != la || h->b1 != lb) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
105 |
len += 12 + bl[h->b1].l - bl[lb].l; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
106 |
la = h->a2; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
107 |
lb = h->b2; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
108 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
109 |
PyEval_RestoreThread(_save); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
110 |
_save = NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
111 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
112 |
result = PyBytes_FromStringAndSize(NULL, len); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
113 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
114 |
if (!result) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
115 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
116 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
117 |
/* build binary patch */ |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
118 |
rb = PyBytes_AsString(result); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
119 |
la = lb = 0; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
120 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
121 |
for (h = l.next; h; h = h->next) { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
122 |
if (h->a1 != la || h->b1 != lb) { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
123 |
len = bl[h->b1].l - bl[lb].l; |
30561
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
124 |
putbe32((uint32_t)(al[la].l + lcommon - al->l), rb); |
7c0c722d568d
bdiff: early pruning of common prefix before doing expensive computations
Mads Kiilerich <madski@unity3d.com>
parents:
30170
diff
changeset
|
125 |
putbe32((uint32_t)(al[h->a1].l + lcommon - al->l), rb + 4); |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
126 |
putbe32((uint32_t)len, rb + 8); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
127 |
memcpy(rb + 12, bl[lb].l, len); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
128 |
rb += 12 + len; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
129 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
130 |
la = h->a2; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
131 |
lb = h->b2; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
132 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
133 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
134 |
nomem: |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
135 |
if (_save) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
136 |
PyEval_RestoreThread(_save); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
137 |
free(al); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
138 |
free(bl); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
139 |
bdiff_freehunks(l.next); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
140 |
return result ? result : PyErr_NoMemory(); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
141 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
142 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
143 |
/* |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
144 |
* If allws != 0, remove all whitespace (' ', \t and \r). Otherwise, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
145 |
* reduce whitespace sequences to a single space and trim remaining whitespace |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
146 |
* from end of lines. |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
147 |
*/ |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
148 |
static PyObject *fixws(PyObject *self, PyObject *args) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
149 |
{ |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
150 |
PyObject *s, *result = NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
151 |
char allws, c; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
152 |
const char *r; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
153 |
Py_ssize_t i, rlen, wlen = 0; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
154 |
char *w; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
155 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
156 |
if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws)) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
157 |
return NULL; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
158 |
r = PyBytes_AsString(s); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
159 |
rlen = PyBytes_Size(s); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
160 |
|
31467
08ecec297521
bdiff: use Python memory allocator in fixws
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30561
diff
changeset
|
161 |
w = (char *)PyMem_Malloc(rlen ? rlen : 1); |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
162 |
if (!w) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
163 |
goto nomem; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
164 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
165 |
for (i = 0; i != rlen; i++) { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
166 |
c = r[i]; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
167 |
if (c == ' ' || c == '\t' || c == '\r') { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
168 |
if (!allws && (wlen == 0 || w[wlen - 1] != ' ')) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
169 |
w[wlen++] = ' '; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
170 |
} else if (c == '\n' && !allws |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
171 |
&& wlen > 0 && w[wlen - 1] == ' ') { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
172 |
w[wlen - 1] = '\n'; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
173 |
} else { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
174 |
w[wlen++] = c; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
175 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
176 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
177 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
178 |
result = PyBytes_FromStringAndSize(w, wlen); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
179 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
180 |
nomem: |
31467
08ecec297521
bdiff: use Python memory allocator in fixws
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30561
diff
changeset
|
181 |
PyMem_Free(w); |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
182 |
return result ? result : PyErr_NoMemory(); |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
183 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
184 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
185 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
186 |
static char mdiff_doc[] = "Efficient binary diff."; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
187 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
188 |
static PyMethodDef methods[] = { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
189 |
{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"}, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
190 |
{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"}, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
191 |
{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"}, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
192 |
{NULL, NULL} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
193 |
}; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
194 |
|
32355
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
195 |
static const int version = 1; |
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
196 |
|
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
197 |
#ifdef IS_PY3K |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
198 |
static struct PyModuleDef bdiff_module = { |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
199 |
PyModuleDef_HEAD_INIT, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
200 |
"bdiff", |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
201 |
mdiff_doc, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
202 |
-1, |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
203 |
methods |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
204 |
}; |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
205 |
|
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
206 |
PyMODINIT_FUNC PyInit_bdiff(void) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
207 |
{ |
32355
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
208 |
PyObject *m; |
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
209 |
m = PyModule_Create(&bdiff_module); |
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
210 |
PyModule_AddIntConstant(m, "version", version); |
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
211 |
return m; |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
212 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
213 |
#else |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
214 |
PyMODINIT_FUNC initbdiff(void) |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
215 |
{ |
32355
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
216 |
PyObject *m; |
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
217 |
m = Py_InitModule3("bdiff", methods, mdiff_doc); |
4195b84940e9
bdiff: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
31467
diff
changeset
|
218 |
PyModule_AddIntConstant(m, "version", version); |
29541
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
219 |
} |
9631ff5ebbeb
bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff
changeset
|
220 |
#endif |