annotate mercurial/cext/charencode.h @ 33926:f4433f2713d0

encoding: add function to test if a str consists of ASCII characters Most strings are ASCII. Let's optimize for it. Using uint64_t is slightly faster than uint32_t on 64bit system, but there isn't huge difference.
author Yuya Nishihara <yuya@tcha.org>
date Sun, 23 Apr 2017 12:59:42 +0900
parents 2c37f9dabc32
children 81199632fa42
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11358
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
1 /*
33758
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
2 charencode.h - miscellaneous character encoding
11358
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
3
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
4 This software may be used and distributed according to the terms of
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
5 the GNU General Public License, incorporated herein by reference.
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
6 */
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
7
33758
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
8 #ifndef _HG_CHARENCODE_H_
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
9 #define _HG_CHARENCODE_H_
11358
4494fb02d549 util.h: Utility macros for handling different Python APIs.
Renato Cunha <renatoc@gmail.com>
parents:
diff changeset
10
33758
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
11 #include <Python.h>
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
12 #include "compat.h"
21809
e250b8300e6e parsers: inline fields of dirstate values in C version
Siddharth Agarwal <sid0@fb.com>
parents: 19833
diff changeset
13
24608
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
14 /* This should be kept in sync with normcasespecs in encoding.py. */
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
15 enum normcase_spec {
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
16 NORMCASE_LOWER = -1,
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
17 NORMCASE_UPPER = 1,
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
18 NORMCASE_OTHER = 0
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
19 };
1c533e23ce95 util.h: define an enum for normcase specs
Siddharth Agarwal <sid0@fb.com>
parents: 24443
diff changeset
20
33759
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33758
diff changeset
21 PyObject *unhexlify(const char *str, Py_ssize_t len);
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
22 PyObject *isasciistr(PyObject *self, PyObject *args);
33758
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
23 PyObject *asciilower(PyObject *self, PyObject *args);
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
24 PyObject *asciiupper(PyObject *self, PyObject *args);
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
25 PyObject *make_file_foldmap(PyObject *self, PyObject *args);
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
26 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args);
33756
5866ba5e9c48 cext: move _dict_new_presized() to header
Yuya Nishihara <yuya@tcha.org>
parents: 32386
diff changeset
27
32386
7640584e697c cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents: 32385
diff changeset
28 static const int8_t hextable[256] = {
32378
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
29 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
30 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
31 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
32 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, /* 0-9 */
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
33 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* A-F */
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
34 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
35 -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* a-f */
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
36 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
37 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
38 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
39 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
40 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
41 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
42 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
43 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
44 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
45 };
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
46
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
47 static inline int hexdigit(const char *p, Py_ssize_t off)
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
48 {
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
49 int8_t val = hextable[(unsigned char)p[off]];
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
50
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
51 if (val >= 0) {
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
52 return val;
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
53 }
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
54
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
55 PyErr_SetString(PyExc_ValueError, "input contains non-hex character");
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
56 return 0;
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
57 }
7d0c69505a66 cext: extract revlog/index parsing code to own C file
Gregory Szorc <gregory.szorc@gmail.com>
parents: 30112
diff changeset
58
33758
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
59 #endif /* _HG_CHARENCODE_H_ */