author | Valentin Gatien-Baron <vgatien-baron@janestreet.com> |
Tue, 25 Sep 2018 16:03:14 -0400 | |
branch | stable |
changeset 39799 | f28812c708a5 |
parent 39420 | 91477b123700 |
child 41336 | 763b45bc4483 |
permissions | -rw-r--r-- |
3283 | 1 |
/* |
2 |
base85 codec |
|
3 |
||
4 |
Copyright 2006 Brendan Cully <brendan@kublai.com> |
|
5 |
||
6 |
This software may be used and distributed according to the terms of |
|
7 |
the GNU General Public License, incorporated herein by reference. |
|
8 |
||
9 |
Largely based on git's implementation |
|
10 |
*/ |
|
11 |
||
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
12 |
#define PY_SSIZE_T_CLEAN |
3283 | 13 |
#include <Python.h> |
14 |
||
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
15 |
#include "util.h" |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
16 |
|
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
17 |
static const char b85chars[] = |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
18 |
"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
19 |
"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; |
3283 | 20 |
static char b85dec[256]; |
21 |
||
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
22 |
static void b85prep(void) |
3283 | 23 |
{ |
26074
c1aefe57cf4e
base85: fix comparison of an int and a long
Augie Fackler <augie@google.com>
parents:
16848
diff
changeset
|
24 |
unsigned i; |
3283 | 25 |
|
26 |
memset(b85dec, 0, sizeof(b85dec)); |
|
27 |
for (i = 0; i < sizeof(b85chars); i++) |
|
28 |
b85dec[(int)(b85chars[i])] = i + 1; |
|
29 |
} |
|
30 |
||
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
31 |
static PyObject *b85encode(PyObject *self, PyObject *args) |
3283 | 32 |
{ |
33 |
const unsigned char *text; |
|
34 |
PyObject *out; |
|
35 |
char *dst; |
|
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
36 |
Py_ssize_t len, olen, i; |
3283 | 37 |
unsigned int acc, val, ch; |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
38 |
int pad = 0; |
3283 | 39 |
|
36620
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36226
diff
changeset
|
40 |
if (!PyArg_ParseTuple(args, PY23("s#|i", "y#|i"), &text, &len, &pad)) |
3283 | 41 |
return NULL; |
42 |
||
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
43 |
if (pad) |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
44 |
olen = ((len + 3) / 4 * 5) - 3; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
45 |
else { |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
46 |
olen = len % 4; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
47 |
if (olen) |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
48 |
olen++; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
49 |
olen += len / 4 * 5; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
50 |
} |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
51 |
if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3))) |
3283 | 52 |
return NULL; |
53 |
||
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
54 |
dst = PyBytes_AsString(out); |
3283 | 55 |
|
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
56 |
while (len) { |
3283 | 57 |
acc = 0; |
58 |
for (i = 24; i >= 0; i -= 8) { |
|
59 |
ch = *text++; |
|
60 |
acc |= ch << i; |
|
61 |
if (--len == 0) |
|
62 |
break; |
|
63 |
} |
|
64 |
for (i = 4; i >= 0; i--) { |
|
65 |
val = acc % 85; |
|
66 |
acc /= 85; |
|
67 |
dst[i] = b85chars[val]; |
|
68 |
} |
|
69 |
dst += 5; |
|
70 |
} |
|
71 |
||
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
72 |
if (!pad) |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
73 |
_PyBytes_Resize(&out, olen); |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
74 |
|
3283 | 75 |
return out; |
76 |
} |
|
77 |
||
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
78 |
static PyObject *b85decode(PyObject *self, PyObject *args) |
3283 | 79 |
{ |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
80 |
PyObject *out = NULL; |
3283 | 81 |
const char *text; |
82 |
char *dst; |
|
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
83 |
Py_ssize_t len, i, j, olen, cap; |
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
84 |
int c; |
3283 | 85 |
unsigned int acc; |
86 |
||
36620
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36226
diff
changeset
|
87 |
if (!PyArg_ParseTuple(args, PY23("s#", "y#"), &text, &len)) |
3283 | 88 |
return NULL; |
89 |
||
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
90 |
olen = len / 5 * 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
91 |
i = len % 5; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
92 |
if (i) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
93 |
olen += i - 1; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
94 |
if (!(out = PyBytes_FromStringAndSize(NULL, olen))) |
3283 | 95 |
return NULL; |
96 |
||
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
97 |
dst = PyBytes_AsString(out); |
3283 | 98 |
|
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
99 |
i = 0; |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
100 |
while (i < len) { |
3283 | 101 |
acc = 0; |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
102 |
cap = len - i - 1; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
103 |
if (cap > 4) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
104 |
cap = 4; |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
105 |
for (j = 0; j < cap; i++, j++) { |
3283 | 106 |
c = b85dec[(int)*text++] - 1; |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
107 |
if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
108 |
PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
109 |
PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
110 |
"bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
111 |
(int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
112 |
goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
113 |
} |
3283 | 114 |
acc = acc * 85 + c; |
115 |
} |
|
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
116 |
if (i++ < len) { |
3283 | 117 |
c = b85dec[(int)*text++] - 1; |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
118 |
if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
119 |
PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
120 |
PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
121 |
"bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
122 |
(int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
123 |
goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
124 |
} |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
125 |
/* overflow detection: 0xffffffff == "|NsC0", |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
126 |
* "|NsC" == 0x03030303 */ |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
127 |
if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
128 |
PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
129 |
PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
130 |
"bad base85 sequence at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
131 |
(int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
132 |
goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
133 |
} |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
134 |
acc += c; |
3283 | 135 |
} |
136 |
||
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
137 |
cap = olen < 4 ? olen : 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
138 |
olen -= cap; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
139 |
for (j = 0; j < 4 - cap; j++) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
140 |
acc *= 85; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
141 |
if (cap && cap < 4) |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
142 |
acc += 0xffffff >> (cap - 1) * 8; |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
143 |
for (j = 0; j < cap; j++) { |
3283 | 144 |
acc = (acc << 8) | (acc >> 24); |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
145 |
*dst++ = acc; |
3283 | 146 |
} |
147 |
} |
|
148 |
||
149 |
return out; |
|
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
150 |
bail: |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
151 |
Py_XDECREF(out); |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
152 |
return NULL; |
3283 | 153 |
} |
154 |
||
155 |
static char base85_doc[] = "Base85 Data Encoding"; |
|
156 |
||
157 |
static PyMethodDef methods[] = { |
|
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
158 |
{"b85encode", b85encode, METH_VARARGS, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
159 |
"Encode text in base85.\n\n" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
160 |
"If the second parameter is true, pad the result to a multiple of " |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
161 |
"five characters.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
162 |
{"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
163 |
{NULL, NULL}, |
3283 | 164 |
}; |
165 |
||
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
166 |
static const int version = 1; |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
167 |
|
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
168 |
#ifdef IS_PY3K |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
169 |
static struct PyModuleDef base85_module = { |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
170 |
PyModuleDef_HEAD_INIT, "base85", base85_doc, -1, methods, |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
171 |
}; |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
172 |
|
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
173 |
PyMODINIT_FUNC PyInit_base85(void) |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
174 |
{ |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
175 |
PyObject *m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
176 |
b85prep(); |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
177 |
|
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
178 |
m = PyModule_Create(&base85_module); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
179 |
PyModule_AddIntConstant(m, "version", version); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
180 |
return m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
181 |
} |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
182 |
#else |
3283 | 183 |
PyMODINIT_FUNC initbase85(void) |
184 |
{ |
|
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
185 |
PyObject *m; |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
186 |
m = Py_InitModule3("base85", methods, base85_doc); |
3283 | 187 |
|
188 |
b85prep(); |
|
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
189 |
PyModule_AddIntConstant(m, "version", version); |
3283 | 190 |
} |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
191 |
#endif |