mercurial/parsers.c
author Martin Geisler <mg@daimi.au.dk>
Sat, 09 Aug 2008 02:10:22 +0200
changeset 6871 13fe85fe396b
parent 6395 3f0294536b24
child 7091 12b35ae03365
permissions -rw-r--r--
mdiff: compare content of binary files directly A plain Python string comparison stops when the first mismatch is found, whereas the call to md5 would need to compute the hash over the entire string and only then do the comparison.

/*
 parsers.c - efficient content parsing

 Copyright 2008 Matt Mackall <mpm@selenic.com> and others

 This software may be used and distributed according to the terms of
 the GNU General Public License, incorporated herein by reference.
*/

#include <Python.h>
#include <ctype.h>
#include <string.h>

static int hexdigit(char c)
{
	if (c >= '0' && c <= '9')
		return c - '0';

	if (c >= 'A' && c <= 'F')
		return c - 'A' + 10;

	if (c >= 'a' && c <= 'f')
		return c - 'a' + 10;
	
	return -1;
}

/*
 * Turn a hex-encoded string into binary.
 */
static PyObject *unhexlify(const char *str, int len)
{
	PyObject *ret = NULL;
	const char *c;
	char *d;

	if (len % 2) {
		PyErr_SetString(PyExc_ValueError,
				"input is not even in length");
		goto bail;
	}

	ret = PyString_FromStringAndSize(NULL, len / 2);
	if (!ret)
		goto bail;

	d = PyString_AsString(ret);
	if (!d)
		goto bail;

	for (c = str; c < str + len;) {
		int hi = hexdigit(*c++);
		int lo = hexdigit(*c++);

		if (hi == -1 || lo == -1) {
			PyErr_SetString(PyExc_ValueError,
					"input contains non-hex character");
			goto bail;
		}

		*d++ = (hi << 4) | lo;
	}
	
	goto done;
	
bail:
	Py_XDECREF(ret);
	ret = NULL;
done:
	return ret;
}

/*
 * This code assumes that a manifest is stitched together with newline
 * ('\n') characters.
 */
static PyObject *parse_manifest(PyObject *self, PyObject *args)
{
	PyObject *mfdict, *fdict;
	char *str, *cur, *start, *zero;
	int len;

	if (!PyArg_ParseTuple(args, "O!O!s#:parse_manifest",
			      &PyDict_Type, &mfdict,
			      &PyDict_Type, &fdict,
			      &str, &len))
		goto quit;

	for (start = cur = str, zero = NULL; cur < str + len; cur++) {
		PyObject *file = NULL, *node = NULL;
		PyObject *flags = NULL;
		int nlen;

		if (!*cur) {
			zero = cur;
			continue;
		}
		else if (*cur != '\n')
			continue;

		if (!zero) {
			PyErr_SetString(PyExc_ValueError,
					"manifest entry has no separator");
			goto quit;
		}

		file = PyString_FromStringAndSize(start, zero - start);
		if (!file)
			goto bail;

		nlen = cur - zero - 1;

		node = unhexlify(zero + 1, nlen > 40 ? 40 : nlen);
		if (!node)
			goto bail;

		if (nlen > 40) {
			PyObject *flags;

			flags = PyString_FromStringAndSize(zero + 41,
							   nlen - 40);
			if (!flags)
				goto bail;

			if (PyDict_SetItem(fdict, file, flags) == -1)
				goto bail;
		}

		if (PyDict_SetItem(mfdict, file, node) == -1)
			goto bail;

		start = cur + 1;
		zero = NULL;

		Py_XDECREF(flags);
		Py_XDECREF(node);
		Py_XDECREF(file);
		continue;
	bail:
		Py_XDECREF(flags);
		Py_XDECREF(node);
		Py_XDECREF(file);
		goto quit;
	}

	if (len > 0 && *(cur - 1) != '\n') {
		PyErr_SetString(PyExc_ValueError,
				"manifest contains trailing garbage");
		goto quit;
	}

	Py_INCREF(Py_None);
	return Py_None;

quit:
	return NULL;
}

static char parsers_doc[] = "Efficient content parsing.";

static PyMethodDef methods[] = {
	{"parse_manifest", parse_manifest, METH_VARARGS, "parse a manifest\n"},
	{NULL, NULL}
};

PyMODINIT_FUNC initparsers(void)
{
	Py_InitModule3("parsers", methods, parsers_doc);
}