Mercurial > hg
annotate mercurial/cext/charencode.h @ 33926:f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Most strings are ASCII. Let's optimize for it.
Using uint64_t is slightly faster than uint32_t on 64bit system, but there
isn't huge difference.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Sun, 23 Apr 2017 12:59:42 +0900 |
parents | 2c37f9dabc32 |
children | 81199632fa42 |
rev | line source |
---|---|
11358
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
1 /* |
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
2 charencode.h - miscellaneous character encoding |
11358
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
3 |
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
4 This software may be used and distributed according to the terms of |
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
5 the GNU General Public License, incorporated herein by reference. |
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
6 */ |
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
7 |
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
8 #ifndef _HG_CHARENCODE_H_ |
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
9 #define _HG_CHARENCODE_H_ |
11358
4494fb02d549
util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff
changeset
|
10 |
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
11 #include <Python.h> |
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
12 #include "compat.h" |
21809
e250b8300e6e
parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents:
19833
diff
changeset
|
13 |
24608
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
14 /* This should be kept in sync with normcasespecs in encoding.py. */ |
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
15 enum normcase_spec { |
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
16 NORMCASE_LOWER = -1, |
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
17 NORMCASE_UPPER = 1, |
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
18 NORMCASE_OTHER = 0 |
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
19 }; |
1c533e23ce95
util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents:
24443
diff
changeset
|
20 |
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
21 PyObject *unhexlify(const char *str, Py_ssize_t len); |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
22 PyObject *isasciistr(PyObject *self, PyObject *args); |
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
23 PyObject *asciilower(PyObject *self, PyObject *args); |
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
24 PyObject *asciiupper(PyObject *self, PyObject *args); |
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
25 PyObject *make_file_foldmap(PyObject *self, PyObject *args); |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
26 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args); |
33756
5866ba5e9c48
cext: move _dict_new_presized() to header
Yuya Nishihara <yuya@tcha.org>
parents:
32386
diff
changeset
|
27 |
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32385
diff
changeset
|
28 static const int8_t hextable[256] = { |
32378
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
32 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */ |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
33 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */ |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
35 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */ |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
36 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
37 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
39 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
45 }; |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
46 |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
47 static inline int hexdigit(const char *p, Py_ssize_t off) |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
48 { |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
49 int8_t val = hextable[(unsigned char)p[off]]; |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
50 |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
51 if (val >= 0) { |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
52 return val; |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
53 } |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
54 |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
55 PyErr_SetString(PyExc_ValueError, "input contains non-hex character"); |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
56 return 0; |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
57 } |
7d0c69505a66
cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents:
30112
diff
changeset
|
58 |
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
59 #endif /* _HG_CHARENCODE_H_ */ |