mercurial/mpatch_module.c
author Jun Wu <quark@fb.com>
Sat, 22 Apr 2017 16:50:08 -0700
changeset 32112 31763785094b
parent 29749 155f0cc3f813
child 32358 5fc3459d0493
permissions -rw-r--r--
worker: rewrite error handling so os._exit covers all cases Previously the worker error handling is like: pid = os.fork() --+ if pid == 0: | .... | problematic .... --+ try: --+ .... | worker error handling --+ If a signal arrives when Python is executing the "problematic" lines, an external error handling (dispatch.py) will take over the control flow and it's no longer guaranteed "os._exit" is called (see 86cd09bc13ba for why it is necessary). This patch rewrites the error handling so it covers all possible code paths for a worker even during fork. Note: "os.getpid() == parentpid" is used to test if the process is parent or not intentionally, instead of checking "pid", because "pid = os.fork()" may be not atomic - it's possible that that a signal hits the worker before the assignment completes [1]. The newly added test replaces "os.fork" to exercise that extreme case. [1]: CPython compiles "pid = os.fork()" to 2 byte codes: "CALL_FUNCTION" and "STORE_FAST", so it's probably not atomic: def f(): pid = os.fork() dis.dis(f) 2 0 LOAD_GLOBAL 0 (os) 3 LOAD_ATTR 1 (fork) 6 CALL_FUNCTION 0 9 STORE_FAST 0 (pid) 12 LOAD_CONST 0 (None) 15 RETURN_VALUE

/*
 mpatch.c - efficient binary patching for Mercurial

 This implements a patch algorithm that's O(m + nlog n) where m is the
 size of the output and n is the number of patches.

 Given a list of binary patches, it unpacks each into a hunk list,
 then combines the hunk lists with a treewise recursion to form a
 single hunk list. This hunk list is then applied to the original
 text.

 The text (or binary) fragments are copied directly from their source
 Python objects into a preallocated output string to avoid the
 allocation of intermediate Python objects. Working memory is about 2x
 the total number of hunks.

 Copyright 2005, 2006 Matt Mackall <mpm@selenic.com>

 This software may be used and distributed according to the terms
 of the GNU General Public License, incorporated herein by reference.
*/

#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <stdlib.h>
#include <string.h>

#include "util.h"
#include "bitmanipulation.h"
#include "compat.h"
#include "mpatch.h"

static char mpatch_doc[] = "Efficient binary patching.";
static PyObject *mpatch_Error;

static void setpyerr(int r)
{
	switch (r) {
	case MPATCH_ERR_NO_MEM:
		PyErr_NoMemory();
		break;
	case MPATCH_ERR_CANNOT_BE_DECODED:
		PyErr_SetString(mpatch_Error, "patch cannot be decoded");
		break;
	case MPATCH_ERR_INVALID_PATCH:
		PyErr_SetString(mpatch_Error, "invalid patch");
		break;
	}
}

struct mpatch_flist *cpygetitem(void *bins, ssize_t pos)
{
	const char *buffer;
	struct mpatch_flist *res;
	ssize_t blen;
	int r;

	PyObject *tmp = PyList_GetItem((PyObject*)bins, pos);
	if (!tmp)
		return NULL;
	if (PyObject_AsCharBuffer(tmp, &buffer, (Py_ssize_t*)&blen))
		return NULL;
	if ((r = mpatch_decode(buffer, blen, &res)) < 0) {
		if (!PyErr_Occurred())
			setpyerr(r);
		return NULL;
	}
	return res;
}

static PyObject *
patches(PyObject *self, PyObject *args)
{
	PyObject *text, *bins, *result;
	struct mpatch_flist *patch;
	const char *in;
	int r = 0;
	char *out;
	Py_ssize_t len, outlen, inlen;

	if (!PyArg_ParseTuple(args, "OO:mpatch", &text, &bins))
		return NULL;

	len = PyList_Size(bins);
	if (!len) {
		/* nothing to do */
		Py_INCREF(text);
		return text;
	}

	if (PyObject_AsCharBuffer(text, &in, &inlen))
		return NULL;

	patch = mpatch_fold(bins, cpygetitem, 0, len);
	if (!patch) { /* error already set or memory error */
		if (!PyErr_Occurred())
			PyErr_NoMemory();
		return NULL;
	}

	outlen = mpatch_calcsize(inlen, patch);
	if (outlen < 0) {
		r = (int)outlen;
		result = NULL;
		goto cleanup;
	}
	result = PyBytes_FromStringAndSize(NULL, outlen);
	if (!result) {
		result = NULL;
		goto cleanup;
	}
	out = PyBytes_AsString(result);
	if ((r = mpatch_apply(out, in, inlen, patch)) < 0) {
		Py_DECREF(result);
		result = NULL;
	}
cleanup:
	mpatch_lfree(patch);
	if (!result && !PyErr_Occurred())
		setpyerr(r);
	return result;
}

/* calculate size of a patched file directly */
static PyObject *
patchedsize(PyObject *self, PyObject *args)
{
	long orig, start, end, len, outlen = 0, last = 0, pos = 0;
	Py_ssize_t patchlen;
	char *bin;

	if (!PyArg_ParseTuple(args, "ls#", &orig, &bin, &patchlen))
		return NULL;

	while (pos >= 0 && pos < patchlen) {
		start = getbe32(bin + pos);
		end = getbe32(bin + pos + 4);
		len = getbe32(bin + pos + 8);
		if (start > end)
			break; /* sanity check */
		pos += 12 + len;
		outlen += start - last;
		last = end;
		outlen += len;
	}

	if (pos != patchlen) {
		if (!PyErr_Occurred())
			PyErr_SetString(mpatch_Error, "patch cannot be decoded");
		return NULL;
	}

	outlen += orig - last;
	return Py_BuildValue("l", outlen);
}

static PyMethodDef methods[] = {
	{"patches", patches, METH_VARARGS, "apply a series of patches\n"},
	{"patchedsize", patchedsize, METH_VARARGS, "calculed patched size\n"},
	{NULL, NULL}
};

#ifdef IS_PY3K
static struct PyModuleDef mpatch_module = {
	PyModuleDef_HEAD_INIT,
	"mpatch",
	mpatch_doc,
	-1,
	methods
};

PyMODINIT_FUNC PyInit_mpatch(void)
{
	PyObject *m;

	m = PyModule_Create(&mpatch_module);
	if (m == NULL)
		return NULL;

	mpatch_Error = PyErr_NewException("mercurial.mpatch.mpatchError",
					  NULL, NULL);
	Py_INCREF(mpatch_Error);
	PyModule_AddObject(m, "mpatchError", mpatch_Error);

	return m;
}
#else
PyMODINIT_FUNC
initmpatch(void)
{
	Py_InitModule3("mpatch", methods, mpatch_doc);
	mpatch_Error = PyErr_NewException("mercurial.mpatch.mpatchError",
					  NULL, NULL);
}
#endif