mercurial/base85.c
author Brendan Cully <brendan@kublai.com>
Fri, 06 Oct 2006 13:01:54 -0700
changeset 3283 1f2c3983a6c5
child 3288 e93c926e069e
permissions -rw-r--r--
Add a base85 codec

/*
 base85 codec

 Copyright 2006 Brendan Cully <brendan@kublai.com>

 This software may be used and distributed according to the terms of
 the GNU General Public License, incorporated herein by reference.

 Largely based on git's implementation
*/

#include <Python.h>

static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
	"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
static char b85dec[256];

static void
b85prep(void)
{
	int i;

	memset(b85dec, 0, sizeof(b85dec));
	for (i = 0; i < sizeof(b85chars); i++)
		b85dec[(int)(b85chars[i])] = i + 1;
}

static PyObject *
b85encode(PyObject *self, PyObject *args)
{
	const unsigned char *text;
	PyObject *out;
	char *dst;
	int len, olen, i;
	unsigned int acc, val, ch;

	if (!PyArg_ParseTuple(args, "s#", &text, &len))
		return NULL;

	olen = (len + 3) / 4 * 5;
	if (!(out = PyString_FromStringAndSize(NULL, olen)))
		return NULL;

	dst = PyString_AS_STRING(out);

	while (len)
	{
		acc = 0;
		for (i = 24; i >= 0; i -= 8) {
			ch = *text++;
			acc |= ch << i;
			if (--len == 0)
				break;
		}
		for (i = 4; i >= 0; i--) {
			val = acc % 85;
			acc /= 85;
			dst[i] = b85chars[val];
		}
		dst += 5;
	}

	return out;
}

static PyObject *
b85decode(PyObject *self, PyObject *args)
{
	PyObject *out;
	const char *text;
	char *dst;
	int len, i, j, olen, c;
	unsigned int acc;

	if (!PyArg_ParseTuple(args, "s#", &text, &len))
		return NULL;

	olen = (len + 4) / 5 * 4;
	if (!(out = PyString_FromStringAndSize(NULL, olen)))
		return NULL;

	dst = PyString_AS_STRING(out);

	for (i = 1; len; i++)
	{
		acc = 0;
		for (j = 0; j < 4 && --len; j++)
		{
			c = b85dec[(int)*text++] - 1;
			if (c < 0)
				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
			acc = acc * 85 + c;
		}
		if (len--)
		{
			c = b85dec[(int)*text++] - 1;
			if (c < 0)
				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
		}
		else
			c = 0;
		/* overflow detection: 0xffffffff == "|NsC0",
		 * "|NsC" == 0x03030303 */
		if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c)
			return PyErr_Format(PyExc_ValueError, "Bad base85 sequence at position %d", i);
		
		acc += c;

		for (j = 0; j < 4; j++)
		{
			acc = (acc << 8) | (acc >> 24);
			*dst++ = (char)acc;
		}
	}

	return out;
}

static char base85_doc[] = "Base85 Data Encoding";

static PyMethodDef methods[] = {
	{"b85encode", b85encode, METH_VARARGS, "encode text in base85\n"},
	{"b85decode", b85decode, METH_VARARGS, "decode base85 text\n"},
	{NULL, NULL}
};

PyMODINIT_FUNC initbase85(void)
{
	Py_InitModule3("base85", methods, base85_doc);

	b85prep();
}