Mercurial > hg
annotate mercurial/cext/base85.c @ 52167:7346f93be7a4
revlog: add the glue to use the Rust `InnerRevlog` from Python
The performance of this has been looked at for quite some time, and some
workflows are actually quite a bit faster than with the Python + C code.
However, we are still (up to 20%) slower in some crucial places like cloning
certain repos, log, cat, which makes this an incomplete rewrite. This is
mostly due to the high amount of overhead in Python <-> Rust FFI, especially
around the VFS code. A future patch series will rewrite the VFS code in
pure Rust, which should hopefully get us up to par with current perfomance,
if not better in all important cases.
This is a "save state" of sorts, as this is a ton of code, and I don't want
to pile up even more things in a single review.
Continuing to try to match the current performance will take an extremely
long time, if it's not impossible, without the aforementioned VFS work.
author | Raphaël Gomès <rgomes@octobus.net> |
---|---|
date | Wed, 19 Jun 2024 19:10:49 +0200 |
parents | 9367571fea21 |
children |
rev | line source |
---|---|
3283 | 1 /* |
2 base85 codec | |
3 | |
4 Copyright 2006 Brendan Cully <brendan@kublai.com> | |
5 | |
6 This software may be used and distributed according to the terms of | |
7 the GNU General Public License, incorporated herein by reference. | |
8 | |
9 Largely based on git's implementation | |
10 */ | |
11 | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
12 #define PY_SSIZE_T_CLEAN |
3283 | 13 #include <Python.h> |
14 | |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
15 #include "util.h" |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
16 |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
17 static const char b85chars[] = |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
18 "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
19 "abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"; |
3283 | 20 static char b85dec[256]; |
21 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
22 static void b85prep(void) |
3283 | 23 { |
26074
c1aefe57cf4e
base85: fix comparison of an int and a long
Augie Fackler <augie@google.com>
parents:
16848
diff
changeset
|
24 unsigned i; |
3283 | 25 |
26 memset(b85dec, 0, sizeof(b85dec)); | |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
27 for (i = 0; i < sizeof(b85chars); i++) { |
3283 | 28 b85dec[(int)(b85chars[i])] = i + 1; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
29 } |
3283 | 30 } |
31 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
32 static PyObject *b85encode(PyObject *self, PyObject *args) |
3283 | 33 { |
34 const unsigned char *text; | |
35 PyObject *out; | |
36 char *dst; | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
37 Py_ssize_t len, olen, i; |
3283 | 38 unsigned int acc, val, ch; |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
39 int pad = 0; |
3283 | 40 |
51723
9367571fea21
cext: correct the argument handling of `b85encode()`
Matt Harbison <matt_harbison@yahoo.com>
parents:
48821
diff
changeset
|
41 if (!PyArg_ParseTuple(args, "y#|p", &text, &len, &pad)) { |
3283 | 42 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
43 } |
3283 | 44 |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
45 if (pad) { |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
46 olen = ((len + 3) / 4 * 5) - 3; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
47 } else { |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
48 olen = len % 4; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
49 if (olen) { |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
50 olen++; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
51 } |
7190
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
52 olen += len / 4 * 5; |
aecea6934fdd
Some additional space/tab cleanups
Thomas Arendsen Hein <thomas@intevation.de>
parents:
3369
diff
changeset
|
53 } |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
54 if (!(out = PyBytes_FromStringAndSize(NULL, olen + 3))) { |
3283 | 55 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
56 } |
3283 | 57 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
58 dst = PyBytes_AsString(out); |
3283 | 59 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
60 while (len) { |
3283 | 61 acc = 0; |
62 for (i = 24; i >= 0; i -= 8) { | |
63 ch = *text++; | |
64 acc |= ch << i; | |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
65 if (--len == 0) { |
3283 | 66 break; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
67 } |
3283 | 68 } |
69 for (i = 4; i >= 0; i--) { | |
70 val = acc % 85; | |
71 acc /= 85; | |
72 dst[i] = b85chars[val]; | |
73 } | |
74 dst += 5; | |
75 } | |
76 | |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
77 if (!pad) { |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
78 _PyBytes_Resize(&out, olen); |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
79 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
80 |
3283 | 81 return out; |
82 } | |
83 | |
27060
4613a89bea42
base85: clean up function definition style
Augie Fackler <augie@google.com>
parents:
26074
diff
changeset
|
84 static PyObject *b85decode(PyObject *self, PyObject *args) |
3283 | 85 { |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
86 PyObject *out = NULL; |
3283 | 87 const char *text; |
88 char *dst; | |
16837
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
89 Py_ssize_t len, i, j, olen, cap; |
1b9d54c00d50
base85: use Py_ssize_t for string lengths
Adrian Buehlmann <adrian@cadifra.com>
parents:
16522
diff
changeset
|
90 int c; |
3283 | 91 unsigned int acc; |
92 | |
48821
b0dd39b91e7a
cext: remove PY23()
Gregory Szorc <gregory.szorc@gmail.com>
parents:
48810
diff
changeset
|
93 if (!PyArg_ParseTuple(args, "y#", &text, &len)) { |
3283 | 94 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
95 } |
3283 | 96 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
97 olen = len / 5 * 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
98 i = len % 5; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
99 if (i) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
100 olen += i - 1; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
101 } |
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
102 if (!(out = PyBytes_FromStringAndSize(NULL, olen))) { |
3283 | 103 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
104 } |
3283 | 105 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
106 dst = PyBytes_AsString(out); |
3283 | 107 |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
108 i = 0; |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
109 while (i < len) { |
3283 | 110 acc = 0; |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
111 cap = len - i - 1; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
112 if (cap > 4) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
113 cap = 4; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
114 } |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
115 for (j = 0; j < cap; i++, j++) { |
3283 | 116 c = b85dec[(int)*text++] - 1; |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
117 if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
118 PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
119 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
120 "bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
121 (int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
122 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
123 } |
3283 | 124 acc = acc * 85 + c; |
125 } | |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
126 if (i++ < len) { |
3283 | 127 c = b85dec[(int)*text++] - 1; |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
128 if (c < 0) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
129 PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
130 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
131 "bad base85 character at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
132 (int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
133 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
134 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
135 /* overflow detection: 0xffffffff == "|NsC0", |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
136 * "|NsC" == 0x03030303 */ |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
137 if (acc > 0x03030303 || (acc *= 85) > 0xffffffff - c) { |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
138 PyErr_Format( |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
139 PyExc_ValueError, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
140 "bad base85 sequence at position %d", |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
141 (int)i); |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
142 goto bail; |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
143 } |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
144 acc += c; |
3283 | 145 } |
146 | |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
147 cap = olen < 4 ? olen : 4; |
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
148 olen -= cap; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
149 for (j = 0; j < 4 - cap; j++) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
150 acc *= 85; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
151 } |
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
152 if (cap && cap < 4) { |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
153 acc += 0xffffff >> (cap - 1) * 8; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
39420
diff
changeset
|
154 } |
34437
ce26a13869fb
cext: move braces for control statements to same line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
32368
diff
changeset
|
155 for (j = 0; j < cap; j++) { |
3283 | 156 acc = (acc << 8) | (acc >> 24); |
3288
e93c926e069e
Handle odd-sized base85 input and output
Brendan Cully <brendan@kublai.com>
parents:
3283
diff
changeset
|
157 *dst++ = acc; |
3283 | 158 } |
159 } | |
160 | |
161 return out; | |
39420
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
162 bail: |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
163 Py_XDECREF(out); |
91477b123700
base85: fix leak on error return from b85decode()
Yuya Nishihara <yuya@tcha.org>
parents:
36620
diff
changeset
|
164 return NULL; |
3283 | 165 } |
166 | |
167 static char base85_doc[] = "Base85 Data Encoding"; | |
168 | |
169 static PyMethodDef methods[] = { | |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
170 {"b85encode", b85encode, METH_VARARGS, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
171 "Encode text in base85.\n\n" |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
172 "If the second parameter is true, pad the result to a multiple of " |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
173 "five characters.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
174 {"b85decode", b85decode, METH_VARARGS, "Decode base85 text.\n"}, |
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
175 {NULL, NULL}, |
3283 | 176 }; |
177 | |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
178 static const int version = 1; |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
179 |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
180 static struct PyModuleDef base85_module = { |
36226
e1138fc2c4e2
base85: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
34437
diff
changeset
|
181 PyModuleDef_HEAD_INIT, "base85", base85_doc, -1, methods, |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
182 }; |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
183 |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
184 PyMODINIT_FUNC PyInit_base85(void) |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
185 { |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
186 PyObject *m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
187 b85prep(); |
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
188 |
32356
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
189 m = PyModule_Create(&base85_module); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
190 PyModule_AddIntConstant(m, "version", version); |
7948adb53e28
base85: add version to help detect breaking binary changes
Jun Wu <quark@fb.com>
parents:
27060
diff
changeset
|
191 return m; |
11362
f42ef9493fa9
base85.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10282
diff
changeset
|
192 } |