mercurial/bdiff_module.c
author Gregory Szorc <gregory.szorc@gmail.com>
Sun, 06 Nov 2016 09:51:14 -0800
changeset 30335 7d91a085ebe6
parent 30170 15635d8b17e0
child 30561 7c0c722d568d
permissions -rw-r--r--
perf: prepare to handle multiple pairs in perfbdiff Before, we only supported benchmarking a single pair of texts with bdiff. We want to enable feeding larger corpora into this benchmark. So rewrite the code to support that.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     1
/*
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     2
 bdiff.c - efficient binary diff extension for Mercurial
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     3
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     4
 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     5
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     8
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
     9
 Based roughly on Python difflib
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    10
*/
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    11
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    12
#define PY_SSIZE_T_CLEAN
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    13
#include <Python.h>
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    14
#include <stdlib.h>
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    15
#include <string.h>
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    16
#include <limits.h>
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    17
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    18
#include "bdiff.h"
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    19
#include "bitmanipulation.h"
30170
15635d8b17e0 bdiff: include util.h
Gregory Szorc <gregory.szorc@gmail.com>
parents: 29541
diff changeset
    20
#include "util.h"
29541
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    21
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    22
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    23
static PyObject *blocks(PyObject *self, PyObject *args)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    24
{
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    25
	PyObject *sa, *sb, *rl = NULL, *m;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    26
	struct bdiff_line *a, *b;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    27
	struct bdiff_hunk l, *h;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    28
	int an, bn, count, pos = 0;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    29
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    30
	l.next = NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    31
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    32
	if (!PyArg_ParseTuple(args, "SS:bdiff", &sa, &sb))
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    33
		return NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    34
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    35
	an = bdiff_splitlines(PyBytes_AsString(sa), PyBytes_Size(sa), &a);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    36
	bn = bdiff_splitlines(PyBytes_AsString(sb), PyBytes_Size(sb), &b);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    37
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    38
	if (!a || !b)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    39
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    40
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    41
	count = bdiff_diff(a, an, b, bn, &l);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    42
	if (count < 0)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    43
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    44
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    45
	rl = PyList_New(count);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    46
	if (!rl)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    47
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    48
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    49
	for (h = l.next; h; h = h->next) {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    50
		m = Py_BuildValue("iiii", h->a1, h->a2, h->b1, h->b2);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    51
		PyList_SetItem(rl, pos, m);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    52
		pos++;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    53
	}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    54
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    55
nomem:
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    56
	free(a);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    57
	free(b);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    58
	bdiff_freehunks(l.next);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    59
	return rl ? rl : PyErr_NoMemory();
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    60
}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    61
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    62
static PyObject *bdiff(PyObject *self, PyObject *args)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    63
{
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    64
	char *sa, *sb, *rb;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    65
	PyObject *result = NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    66
	struct bdiff_line *al, *bl;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    67
	struct bdiff_hunk l, *h;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    68
	int an, bn, count;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    69
	Py_ssize_t len = 0, la, lb;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    70
	PyThreadState *_save;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    71
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    72
	l.next = NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    73
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    74
	if (!PyArg_ParseTuple(args, "s#s#:bdiff", &sa, &la, &sb, &lb))
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    75
		return NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    76
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    77
	if (la > UINT_MAX || lb > UINT_MAX) {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    78
		PyErr_SetString(PyExc_ValueError, "bdiff inputs too large");
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    79
		return NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    80
	}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    81
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    82
	_save = PyEval_SaveThread();
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    83
	an = bdiff_splitlines(sa, la, &al);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    84
	bn = bdiff_splitlines(sb, lb, &bl);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    85
	if (!al || !bl)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    86
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    87
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    88
	count = bdiff_diff(al, an, bl, bn, &l);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    89
	if (count < 0)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    90
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    91
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    92
	/* calculate length of output */
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    93
	la = lb = 0;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    94
	for (h = l.next; h; h = h->next) {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    95
		if (h->a1 != la || h->b1 != lb)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    96
			len += 12 + bl[h->b1].l - bl[lb].l;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    97
		la = h->a2;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    98
		lb = h->b2;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
    99
	}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   100
	PyEval_RestoreThread(_save);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   101
	_save = NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   102
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   103
	result = PyBytes_FromStringAndSize(NULL, len);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   104
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   105
	if (!result)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   106
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   107
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   108
	/* build binary patch */
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   109
	rb = PyBytes_AsString(result);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   110
	la = lb = 0;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   111
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   112
	for (h = l.next; h; h = h->next) {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   113
		if (h->a1 != la || h->b1 != lb) {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   114
			len = bl[h->b1].l - bl[lb].l;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   115
			putbe32((uint32_t)(al[la].l - al->l), rb);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   116
			putbe32((uint32_t)(al[h->a1].l - al->l), rb + 4);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   117
			putbe32((uint32_t)len, rb + 8);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   118
			memcpy(rb + 12, bl[lb].l, len);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   119
			rb += 12 + len;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   120
		}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   121
		la = h->a2;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   122
		lb = h->b2;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   123
	}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   124
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   125
nomem:
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   126
	if (_save)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   127
		PyEval_RestoreThread(_save);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   128
	free(al);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   129
	free(bl);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   130
	bdiff_freehunks(l.next);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   131
	return result ? result : PyErr_NoMemory();
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   132
}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   133
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   134
/*
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   135
 * If allws != 0, remove all whitespace (' ', \t and \r). Otherwise,
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   136
 * reduce whitespace sequences to a single space and trim remaining whitespace
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   137
 * from end of lines.
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   138
 */
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   139
static PyObject *fixws(PyObject *self, PyObject *args)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   140
{
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   141
	PyObject *s, *result = NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   142
	char allws, c;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   143
	const char *r;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   144
	Py_ssize_t i, rlen, wlen = 0;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   145
	char *w;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   146
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   147
	if (!PyArg_ParseTuple(args, "Sb:fixws", &s, &allws))
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   148
		return NULL;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   149
	r = PyBytes_AsString(s);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   150
	rlen = PyBytes_Size(s);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   151
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   152
	w = (char *)malloc(rlen ? rlen : 1);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   153
	if (!w)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   154
		goto nomem;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   155
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   156
	for (i = 0; i != rlen; i++) {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   157
		c = r[i];
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   158
		if (c == ' ' || c == '\t' || c == '\r') {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   159
			if (!allws && (wlen == 0 || w[wlen - 1] != ' '))
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   160
				w[wlen++] = ' ';
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   161
		} else if (c == '\n' && !allws
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   162
			  && wlen > 0 && w[wlen - 1] == ' ') {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   163
			w[wlen - 1] = '\n';
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   164
		} else {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   165
			w[wlen++] = c;
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   166
		}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   167
	}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   168
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   169
	result = PyBytes_FromStringAndSize(w, wlen);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   170
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   171
nomem:
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   172
	free(w);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   173
	return result ? result : PyErr_NoMemory();
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   174
}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   175
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   176
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   177
static char mdiff_doc[] = "Efficient binary diff.";
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   178
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   179
static PyMethodDef methods[] = {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   180
	{"bdiff", bdiff, METH_VARARGS, "calculate a binary diff\n"},
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   181
	{"blocks", blocks, METH_VARARGS, "find a list of matching lines\n"},
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   182
	{"fixws", fixws, METH_VARARGS, "normalize diff whitespaces\n"},
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   183
	{NULL, NULL}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   184
};
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   185
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   186
#ifdef IS_PY3K
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   187
static struct PyModuleDef bdiff_module = {
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   188
	PyModuleDef_HEAD_INIT,
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   189
	"bdiff",
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   190
	mdiff_doc,
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   191
	-1,
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   192
	methods
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   193
};
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   194
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   195
PyMODINIT_FUNC PyInit_bdiff(void)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   196
{
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   197
	return PyModule_Create(&bdiff_module);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   198
}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   199
#else
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   200
PyMODINIT_FUNC initbdiff(void)
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   201
{
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   202
	Py_InitModule3("bdiff", methods, mdiff_doc);
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   203
}
9631ff5ebbeb bdiff: split bdiff into cpy-aware and cpy-agnostic part
Maciej Fijalkowski <fijall@gmail.com>
parents:
diff changeset
   204
#endif