mercurial/base85.c
author Daniel Holth <dholth@fastmail.fm>
Wed, 16 May 2007 01:10:12 -0400
changeset 4448 af013ae3ca10
parent 3369 4bad632913d8
child 7190 aecea6934fdd
permissions -rw-r--r--
use documented convert-repo interface
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     1
/*
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     2
 base85 codec
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     3
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     4
 Copyright 2006 Brendan Cully <brendan@kublai.com>
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     5
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     8
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
     9
 Largely based on git's implementation
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    10
*/
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    11
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    12
#include <Python.h>
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    13
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    14
static const char b85chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    15
	"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~";
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    16
static char b85dec[256];
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    17
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    18
static void
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    19
b85prep(void)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    20
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    21
	int i;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    22
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    23
	memset(b85dec, 0, sizeof(b85dec));
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    24
	for (i = 0; i < sizeof(b85chars); i++)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    25
		b85dec[(int)(b85chars[i])] = i + 1;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    26
}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    27
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    28
static PyObject *
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    29
b85encode(PyObject *self, PyObject *args)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    30
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    31
	const unsigned char *text;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    32
	PyObject *out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    33
	char *dst;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    34
	int len, olen, i;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    35
	unsigned int acc, val, ch;
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    36
        int pad = 0;
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    37
3369
4bad632913d8 python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 3332
diff changeset
    38
	if (!PyArg_ParseTuple(args, "s#|i", &text, &len, &pad))
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    39
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    40
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    41
        if (pad)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    42
                olen = ((len + 3) / 4 * 5) - 3;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    43
        else {
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    44
                olen = len % 4;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    45
                if (olen)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    46
                        olen++;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    47
                olen += len / 4 * 5;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    48
        }
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    49
	if (!(out = PyString_FromStringAndSize(NULL, olen + 3)))
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    50
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    51
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    52
	dst = PyString_AS_STRING(out);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    53
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    54
	while (len) {
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    55
		acc = 0;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    56
		for (i = 24; i >= 0; i -= 8) {
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    57
			ch = *text++;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    58
			acc |= ch << i;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    59
			if (--len == 0)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    60
				break;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    61
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    62
		for (i = 4; i >= 0; i--) {
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    63
			val = acc % 85;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    64
			acc /= 85;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    65
			dst[i] = b85chars[val];
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    66
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    67
		dst += 5;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    68
	}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    69
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    70
        if (!pad)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    71
                _PyString_Resize(&out, olen);
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    72
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    73
	return out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    74
}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    75
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    76
static PyObject *
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    77
b85decode(PyObject *self, PyObject *args)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    78
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    79
	PyObject *out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    80
	const char *text;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    81
	char *dst;
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    82
	int len, i, j, olen, c, cap;
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    83
	unsigned int acc;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    84
3369
4bad632913d8 python2.5 PyArg_ParseTuple fix
Alexis S. L. Carvalho <alexis@cecm.usp.br>
parents: 3332
diff changeset
    85
	if (!PyArg_ParseTuple(args, "s#", &text, &len))
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    86
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    87
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    88
	olen = len / 5 * 4;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    89
	i = len % 5;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    90
	if (i)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    91
		olen += i - 1;
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    92
	if (!(out = PyString_FromStringAndSize(NULL, olen)))
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    93
		return NULL;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    94
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    95
	dst = PyString_AS_STRING(out);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    96
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    97
	i = 0;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
    98
	while (i < len)
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
    99
	{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   100
		acc = 0;
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   101
		cap = len - i - 1;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   102
		if (cap > 4)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   103
			cap = 4;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   104
		for (j = 0; j < cap; i++, j++)
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   105
		{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   106
			c = b85dec[(int)*text++] - 1;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   107
			if (c < 0)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   108
				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   109
			acc = acc * 85 + c;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   110
		}
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   111
		if (i++ < len)
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   112
		{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   113
			c = b85dec[(int)*text++] - 1;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   114
			if (c < 0)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   115
				return PyErr_Format(PyExc_ValueError, "Bad base85 character at position %d", i);
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   116
			/* overflow detection: 0xffffffff == "|NsC0",
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   117
			 * "|NsC" == 0x03030303 */
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   118
			if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   119
				return PyErr_Format(PyExc_ValueError, "Bad base85 sequence at position %d", i);
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   120
			acc += c;
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   121
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   122
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   123
		cap = olen < 4 ? olen : 4;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   124
		olen -= cap;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   125
		for (j = 0; j < 4 - cap; j++)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   126
			acc *= 85;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   127
		if (cap && cap < 4)
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   128
			acc += 0xffffff >> (cap - 1) * 8;
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   129
		for (j = 0; j < cap; j++)
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   130
		{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   131
			acc = (acc << 8) | (acc >> 24);
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   132
			*dst++ = acc;
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   133
		}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   134
	}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   135
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   136
	return out;
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   137
}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   138
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   139
static char base85_doc[] = "Base85 Data Encoding";
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   140
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   141
static PyMethodDef methods[] = {
3288
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   142
	{"b85encode", b85encode, METH_VARARGS,
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   143
         "Encode text in base85.\n\n"
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   144
         "If the second parameter is true, pad the result to a multiple of "
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   145
         "five characters.\n"},
e93c926e069e Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents: 3283
diff changeset
   146
	{"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"},
3283
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   147
	{NULL, NULL}
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   148
};
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   149
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   150
PyMODINIT_FUNC initbase85(void)
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   151
{
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   152
	Py_InitModule3("base85", methods, base85_doc);
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   153
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   154
	b85prep();
1f2c3983a6c5 Add a base85 codec
Brendan Cully <brendan@kublai.com>
parents:
diff changeset
   155
}