Mercurial > hg
annotate mercurial/cext/charencode.c @ 45238:bd7515273fd6
commitctx: gather more preparation code within the lock context
This is a small change that exist mostly for clarification. I am about to move a
large amount of code in its own function. having all that code next to each
other will make the next changeset clearer.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Thu, 23 Jul 2020 23:58:23 +0200 |
parents | 763b45bc4483 |
children | d4ba4d51f85f |
rev | line source |
---|---|
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
1 /* |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
2 charencode.c - miscellaneous character encoding |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
3 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
4 Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
5 |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
6 This software may be used and distributed according to the terms of |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
7 the GNU General Public License, incorporated herein by reference. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
8 */ |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
9 |
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
10 #define PY_SSIZE_T_CLEAN |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
11 #include <Python.h> |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
12 #include <assert.h> |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
13 |
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33757
diff
changeset
|
14 #include "charencode.h" |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
15 #include "compat.h" |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
16 #include "util.h" |
20742
3681de20b0a7
parsers: fail fast if Python has wrong minor version (issue4110)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20555
diff
changeset
|
17 |
33812
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
18 #ifdef IS_PY3K |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
19 /* The mapping of Python types is meant to be temporary to get Python |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
20 * 3 to compile. We should remove this once Python 3 support is fully |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
21 * supported and proper types are used in the extensions themselves. */ |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
22 #define PyInt_Type PyLong_Type |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
23 #define PyInt_AS_LONG PyLong_AS_LONG |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
24 #endif |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
25 |
34860
d0912876d7a7
parsers: protect some case-folding tables from clang-format
Augie Fackler <raf@durin42.com>
parents:
34439
diff
changeset
|
26 /* clang-format off */ |
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32384
diff
changeset
|
27 static const char lowertable[128] = { |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
28 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
29 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
30 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
31 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
32 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
33 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
34 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
35 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
36 '\x40', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
37 '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
38 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
39 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
40 '\x78', '\x79', '\x7a', /* X-Z */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
41 '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
42 '\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
43 '\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
44 '\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
45 '\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
46 }; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
47 |
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32384
diff
changeset
|
48 static const char uppertable[128] = { |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
49 '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
50 '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
51 '\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
52 '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
53 '\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
54 '\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
55 '\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
56 '\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
57 '\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
58 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
59 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
60 '\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
61 '\x60', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
62 '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
63 '\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
64 '\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
65 '\x58', '\x59', '\x5a', /* x-z */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
66 '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
67 }; |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
68 |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
69 /* 1: no escape, 2: \<c>, 6: \u<x> */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
70 static const uint8_t jsonlentable[256] = { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
71 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
72 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
73 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
74 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
75 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
76 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
77 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
78 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
79 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
80 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
81 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
83 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
84 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
85 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
86 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
87 }; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
88 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
89 static const uint8_t jsonparanoidlentable[128] = { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
90 6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
91 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
92 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
93 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
94 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
95 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
96 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
97 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
98 }; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
99 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
100 static const char hexchartable[16] = { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
101 '0', '1', '2', '3', '4', '5', '6', '7', |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
102 '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
103 }; |
36058
81199632fa42
charencode: adjust clang-format enable/disable comments
Augie Fackler <augie@google.com>
parents:
34860
diff
changeset
|
104 /* clang-format on */ |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
105 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
106 /* |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
107 * Turn a hex-encoded string into binary. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
108 */ |
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
109 PyObject *unhexlify(const char *str, Py_ssize_t len) |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
110 { |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
111 PyObject *ret; |
6395
3f0294536b24
fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
6389
diff
changeset
|
112 char *d; |
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
113 Py_ssize_t i; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
114 |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
115 ret = PyBytes_FromStringAndSize(NULL, len / 2); |
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
116 |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
117 if (!ret) { |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
118 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
119 } |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
120 |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
121 d = PyBytes_AsString(ret); |
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
122 |
16617
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
123 for (i = 0; i < len;) { |
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
124 int hi = hexdigit(str, i++); |
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
125 int lo = hexdigit(str, i++); |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
126 *d++ = (hi << 4) | lo; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
127 } |
7091
12b35ae03365
parsers: clean up whitespace
Matt Mackall <mpm@selenic.com>
parents:
6395
diff
changeset
|
128 |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
129 return ret; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
130 } |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
131 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
132 PyObject *isasciistr(PyObject *self, PyObject *args) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
133 { |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
134 const char *buf; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
135 Py_ssize_t i, len; |
36620
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36225
diff
changeset
|
136 if (!PyArg_ParseTuple(args, PY23("s#:isasciistr", "y#:isasciistr"), |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
137 &buf, &len)) { |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
138 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
139 } |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
140 i = 0; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
141 /* char array in PyStringObject should be at least 4-byte aligned */ |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
142 if (((uintptr_t)buf & 3) == 0) { |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
143 const uint32_t *p = (const uint32_t *)buf; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
144 for (; i < len / 4; i++) { |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
145 if (p[i] & 0x80808080U) { |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
146 Py_RETURN_FALSE; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
147 } |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
148 } |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
149 i *= 4; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
150 } |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
151 for (; i < len; i++) { |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
152 if (buf[i] & 0x80) { |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
153 Py_RETURN_FALSE; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
154 } |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
155 } |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
156 Py_RETURN_TRUE; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
157 } |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
158 |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
159 static inline PyObject * |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
160 _asciitransform(PyObject *str_obj, const char table[128], PyObject *fallback_fn) |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
161 { |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
162 char *str, *newstr; |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
163 Py_ssize_t i, len; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
164 PyObject *newobj = NULL; |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
165 PyObject *ret = NULL; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
166 |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
167 str = PyBytes_AS_STRING(str_obj); |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
168 len = PyBytes_GET_SIZE(str_obj); |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
169 |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
170 newobj = PyBytes_FromStringAndSize(NULL, len); |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
171 if (!newobj) { |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
172 goto quit; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
173 } |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
174 |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
175 newstr = PyBytes_AS_STRING(newobj); |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
176 |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
177 for (i = 0; i < len; i++) { |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
178 char c = str[i]; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
179 if (c & 0x80) { |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
180 if (fallback_fn != NULL) { |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
181 ret = PyObject_CallFunctionObjArgs( |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
182 fallback_fn, str_obj, NULL); |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
183 } else { |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
184 PyObject *err = PyUnicodeDecodeError_Create( |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
185 "ascii", str, len, i, (i + 1), |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
186 "unexpected code byte"); |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
187 PyErr_SetObject(PyExc_UnicodeDecodeError, err); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
188 Py_XDECREF(err); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
189 } |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
190 goto quit; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
191 } |
24576
fe173106e7fe
parsers: make _asciilower a generic _asciitransform function
Siddharth Agarwal <sid0@fb.com>
parents:
24575
diff
changeset
|
192 newstr[i] = table[(unsigned char)c]; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
193 } |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
194 |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
195 ret = newobj; |
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
196 Py_INCREF(ret); |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
197 quit: |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
198 Py_XDECREF(newobj); |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
199 return ret; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
200 } |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
201 |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
202 PyObject *asciilower(PyObject *self, PyObject *args) |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
203 { |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
204 PyObject *str_obj; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
205 if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) { |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
206 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
207 } |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
208 return _asciitransform(str_obj, lowertable, NULL); |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
209 } |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
210 |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
211 PyObject *asciiupper(PyObject *self, PyObject *args) |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
212 { |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
213 PyObject *str_obj; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
214 if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) { |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
215 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
216 } |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
217 return _asciitransform(str_obj, uppertable, NULL); |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
218 } |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
219 |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
220 PyObject *make_file_foldmap(PyObject *self, PyObject *args) |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
221 { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
222 PyObject *dmap, *spec_obj, *normcase_fallback; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
223 PyObject *file_foldmap = NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
224 enum normcase_spec spec; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
225 PyObject *k, *v; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
226 dirstateTupleObject *tuple; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
227 Py_ssize_t pos = 0; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
228 const char *table; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
229 |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
230 if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", &PyDict_Type, |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
231 &dmap, &PyInt_Type, &spec_obj, &PyFunction_Type, |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
232 &normcase_fallback)) { |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
233 goto quit; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
234 } |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
235 |
24622
1e05f11619bb
parsers.c: avoid implicit conversion loses integer precision warning
André Sintzoff <andre.sintzoff@gmail.com>
parents:
24609
diff
changeset
|
236 spec = (int)PyInt_AS_LONG(spec_obj); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
237 switch (spec) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
238 case NORMCASE_LOWER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
239 table = lowertable; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
240 break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
241 case NORMCASE_UPPER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
242 table = uppertable; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
243 break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
244 case NORMCASE_OTHER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
245 table = NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
246 break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
247 default: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
248 PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
249 goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
250 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
251 |
25583
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
252 /* Add some more entries to deal with additions outside this |
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
253 function. */ |
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
254 file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
255 if (file_foldmap == NULL) { |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
256 goto quit; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
257 } |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
258 |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
259 while (PyDict_Next(dmap, &pos, &k, &v)) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
260 if (!dirstate_tuple_check(v)) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
261 PyErr_SetString(PyExc_TypeError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
262 "expected a dirstate tuple"); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
263 goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
264 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
265 |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
266 tuple = (dirstateTupleObject *)v; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
267 if (tuple->state != 'r') { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
268 PyObject *normed; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
269 if (table != NULL) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
270 normed = _asciitransform(k, table, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
271 normcase_fallback); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
272 } else { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
273 normed = PyObject_CallFunctionObjArgs( |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
274 normcase_fallback, k, NULL); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
275 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
276 |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
277 if (normed == NULL) { |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
278 goto quit; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
279 } |
26049
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
280 if (PyDict_SetItem(file_foldmap, normed, k) == -1) { |
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
281 Py_DECREF(normed); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
282 goto quit; |
26049
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
283 } |
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
284 Py_DECREF(normed); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
285 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
286 } |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
287 return file_foldmap; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
288 quit: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
289 Py_XDECREF(file_foldmap); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
290 return NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
291 } |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
292 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
293 /* calculate length of JSON-escaped string; returns -1 if unsupported */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
294 static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
295 { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
296 Py_ssize_t i, esclen = 0; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
297 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
298 if (paranoid) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
299 /* don't want to process multi-byte escapes in C */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
300 for (i = 0; i < len; i++) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
301 char c = buf[i]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
302 if (c & 0x80) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
303 PyErr_SetString(PyExc_ValueError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
304 "cannot process non-ascii str"); |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
305 return -1; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
306 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
307 esclen += jsonparanoidlentable[(unsigned char)c]; |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
308 if (esclen < 0) { |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
309 PyErr_SetString(PyExc_MemoryError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
310 "overflow in jsonescapelen"); |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
311 return -1; |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
312 } |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
313 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
314 } else { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
315 for (i = 0; i < len; i++) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
316 char c = buf[i]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
317 esclen += jsonlentable[(unsigned char)c]; |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
318 if (esclen < 0) { |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
319 PyErr_SetString(PyExc_MemoryError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
320 "overflow in jsonescapelen"); |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
321 return -1; |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
322 } |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
323 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
324 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
325 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
326 return esclen; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
327 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
328 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
329 /* map '\<c>' escape character */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
330 static char jsonescapechar2(char c) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
331 { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
332 switch (c) { |
34439
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
333 case '\b': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
334 return 'b'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
335 case '\t': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
336 return 't'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
337 case '\n': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
338 return 'n'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
339 case '\f': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
340 return 'f'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
341 case '\r': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
342 return 'r'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
343 case '"': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
344 return '"'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
345 case '\\': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
346 return '\\'; |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
347 } |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
348 return '\0'; /* should not happen */ |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
349 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
350 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
351 /* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
352 include characters mappable by json(paranoid)lentable */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
353 static void encodejsonescape(char *escbuf, Py_ssize_t esclen, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
354 const char *origbuf, Py_ssize_t origlen, |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
355 bool paranoid) |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
356 { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
357 const uint8_t *lentable = |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
358 (paranoid) ? jsonparanoidlentable : jsonlentable; |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
359 Py_ssize_t i, j; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
360 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
361 for (i = 0, j = 0; i < origlen; i++) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
362 char c = origbuf[i]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
363 uint8_t l = lentable[(unsigned char)c]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
364 assert(j + l <= esclen); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
365 switch (l) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
366 case 1: |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
367 escbuf[j] = c; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
368 break; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
369 case 2: |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
370 escbuf[j] = '\\'; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
371 escbuf[j + 1] = jsonescapechar2(c); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
372 break; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
373 case 6: |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
374 memcpy(escbuf + j, "\\u00", 4); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
375 escbuf[j + 4] = hexchartable[(unsigned char)c >> 4]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
376 escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
377 break; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
378 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
379 j += l; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
380 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
381 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
382 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
383 PyObject *jsonescapeu8fast(PyObject *self, PyObject *args) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
384 { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
385 PyObject *origstr, *escstr; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
386 const char *origbuf; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
387 Py_ssize_t origlen, esclen; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
388 int paranoid; |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
389 if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", &PyBytes_Type, |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
390 &origstr, ¶noid)) { |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
391 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
392 } |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
393 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
394 origbuf = PyBytes_AS_STRING(origstr); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
395 origlen = PyBytes_GET_SIZE(origstr); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
396 esclen = jsonescapelen(origbuf, origlen, paranoid); |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
397 if (esclen < 0) { |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
398 return NULL; /* unsupported char found or overflow */ |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
399 } |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
400 if (origlen == esclen) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
401 Py_INCREF(origstr); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
402 return origstr; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
403 } |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
404 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
405 escstr = PyBytes_FromStringAndSize(NULL, esclen); |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
406 if (!escstr) { |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
407 return NULL; |
41336
763b45bc4483
cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents:
36620
diff
changeset
|
408 } |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
409 encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
410 paranoid); |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
411 |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
412 return escstr; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
413 } |