Mercurial > hg
annotate mercurial/cext/base85.c @ 51259:ed6683d4cb29
rust-index: implement faster retain heads using a vec instead of a hashset
This is the same optimization that the C index does, we're only catching up
now because this showed up as slow in benchmarking.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 29 Nov 2023 10:04:41 -0500 |
parents | b0dd39b91e7a |
children | 9367571fea21 |
rev | line source |
---|---|
3283 | 1 /* |
2 base85 codec | |
3 | |
4 Copyright 2006 Brendan Cully <brendan@kublai.com> | |
5 | |
6 This software may be used and distributed according to the terms of | |
7 the GNU General Public License, incorporated herein by reference. | |
8 | |
9 Largely based on git's implementation | |
10 */ | |
11 | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
12 #define PY_SSIZE_T_CLEAN |
3283 | 13 #include <Python.h> |
14 | |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
15 #include "util.h" |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
16 |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
17 static const char b85chars[] = |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
18 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
19 "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; |
3283 | 20 static char b85dec[256]; |
21 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
22 static void b85prep(void) |
3283 | 23 { |
26074
c1aefe57cf4e
base85: fix comparison of an int and a long
Augie Fackler <augie@google.com>
parents:
16848
diff
changeset
|
24 unsigned i; |
3283 | 25 |
26 memset(b85dec, 0, sizeof(b85dec)); | |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
27 for (i = 0; i < sizeof(b85chars); i++) { |
3283 | 28 b85dec[(int)(b85chars[i])] = i + 1; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
29 } |
3283 | 30 } |
31 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
32 static PyObject *b85encode(PyObject *self, PyObject *args) |
3283 | 33 { |
34 const unsigned char *text; | |
35 PyObject *out; | |
36 char *dst; | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
37 Py_ssize_t len, olen, i; |
3283 | 38 unsigned int acc, val, ch; |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
39 int pad = 0; |
3283 | 40 |
48821
b0dd39b91e7a
cext: remove PY23()
Gregory Szorc <gregory.szorc@gmail.com>
parents:
48810
diff
changeset
|
41 if (!PyArg_ParseTuple(args, "y#|i", &text, &len, &pad)) { |
3283 | 42 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
43 } |
3283 | 44 |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
45 if (pad) { |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
46 olen = ((len + 3) / 4 * 5) - 3; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
47 } else { |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
48 olen = len % 4; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
49 if (olen) { |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
50 olen++; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
51 } |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
52 olen += len / 4 * 5; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
53 } |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
54 if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3))) { |
3283 | 55 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
56 } |
3283 | 57 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
58 dst = PyBytes_AsString(out); |
3283 | 59 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
60 while (len) { |
3283 | 61 acc = 0; |
62 for (i = 24; i >= 0; i -= 8) { | |
63 ch = *text++; | |
64 acc |= ch << i; | |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
65 if (--len == 0) { |
3283 | 66 break; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
67 } |
3283 | 68 } |
69 for (i = 4; i >= 0; i--) { | |
70 val = acc % 85; | |
71 acc /= 85; | |
72 dst[i] = b85chars[val]; | |
73 } | |
74 dst += 5; | |
75 } | |
76 | |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
77 if (!pad) { |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
78 _PyBytes_Resize(&out, olen); |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
79 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
80 |
3283 | 81 return out; |
82 } | |
83 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
84 static PyObject *b85decode(PyObject *self, PyObject *args) |
3283 | 85 { |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
86 PyObject *out = NULL; |
3283 | 87 const char *text; |
88 char *dst; | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
89 Py_ssize_t len, i, j, olen, cap; |
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
90 int c; |
3283 | 91 unsigned int acc; |
92 | |
48821
b0dd39b91e7a
cext: remove PY23()
Gregory Szorc <gregory.szorc@gmail.com>
parents:
48810
diff
changeset
|
93 if (!PyArg_ParseTuple(args, "y#", &text, &len)) { |
3283 | 94 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
95 } |
3283 | 96 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
97 olen = len / 5 * 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
98 i = len % 5; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
99 if (i) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
100 olen += i - 1; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
101 } |
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
102 if (!(out = PyBytes_FromStringAndSize(NULL, olen))) { |
3283 | 103 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
104 } |
3283 | 105 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
106 dst = PyBytes_AsString(out); |
3283 | 107 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
108 i = 0; |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
109 while (i < len) { |
3283 | 110 acc = 0; |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
111 cap = len - i - 1; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
112 if (cap > 4) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
113 cap = 4; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
114 } |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
115 for (j = 0; j < cap; i++, j++) { |
3283 | 116 c = b85dec[(int)*text++] - 1; |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
117 if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
118 PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
119 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
120 "bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
121 (int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
122 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
123 } |
3283 | 124 acc = acc * 85 + c; |
125 } | |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
126 if (i++ < len) { |
3283 | 127 c = b85dec[(int)*text++] - 1; |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
128 if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
129 PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
130 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
131 "bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
132 (int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
133 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
134 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
135 /* overflow detection: 0xffffffff == "|NsC0", |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
136 * "|NsC" == 0x03030303 */ |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
137 if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
138 PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
139 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
140 "bad base85 sequence at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
141 (int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
142 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
143 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
144 acc += c; |
3283 | 145 } |
146 | |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
147 cap = olen < 4 ? olen : 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
148 olen -= cap; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
149 for (j = 0; j < 4 - cap; j++) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
150 acc *= 85; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
151 } |
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
152 if (cap && cap < 4) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
153 acc += 0xffffff >> (cap - 1) * 8; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
154 } |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
155 for (j = 0; j < cap; j++) { |
3283 | 156 acc = (acc << 8) | (acc >> 24); |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
157 *dst++ = acc; |
3283 | 158 } |
159 } | |
160 | |
161 return out; | |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
162 bail: |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
163 Py_XDECREF(out); |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
164 return NULL; |
3283 | 165 } |
166 | |
167 static char base85_doc[] = "Base85 Data Encoding"; | |
168 | |
169 static PyMethodDef methods[] = { | |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
170 {"b85encode", b85encode, METH_VARARGS, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
171 "Encode text in base85.\n\n" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
172 "If the second parameter is true, pad the result to a multiple of " |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
173 "five characters.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
174 {"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
175 {NULL, NULL}, |
3283 | 176 }; |
177 | |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
178 static const int version = 1; |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
179 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
180 static struct PyModuleDef base85_module = { |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
181 PyModuleDef_HEAD_INIT, "base85", base85_doc, -1, methods, |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
182 }; |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
183 |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
184 PyMODINIT_FUNC PyInit_base85(void) |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
185 { |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
186 PyObject *m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
187 b85prep(); |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
188 |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
189 m = PyModule_Create(&base85_module); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
190 PyModule_AddIntConstant(m, "version", version); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
191 return m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
192 } |