author | Martin von Zweigbergk <martinvonz@google.com> |
Mon, 03 Dec 2018 11:14:44 -0800 | |
branch | stable |
changeset 40803 | 44c2e80db985 |
parent 36620 | 186c6df3a373 |
child 41336 | 763b45bc4483 |
permissions | -rw-r--r-- |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
1 |
/* |
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
2 |
charencode.c - miscellaneous character encoding |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
3 |
|
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
4 |
Copyright 2008 Matt Mackall <mpm@selenic.com> and others |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
5 |
|
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
6 |
This software may be used and distributed according to the terms of |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
7 |
the GNU General Public License, incorporated herein by reference. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
8 |
*/ |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
9 |
|
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
10 |
#define PY_SSIZE_T_CLEAN |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
11 |
#include <Python.h> |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
12 |
#include <assert.h> |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
13 |
|
33758
0f4ac3b6dee4
cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents:
33757
diff
changeset
|
14 |
#include "charencode.h" |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
15 |
#include "compat.h" |
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
16 |
#include "util.h" |
20742
3681de20b0a7
parsers: fail fast if Python has wrong minor version (issue4110)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents:
20555
diff
changeset
|
17 |
|
33812
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
18 |
#ifdef IS_PY3K |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
19 |
/* The mapping of Python types is meant to be temporary to get Python |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
20 |
* 3 to compile. We should remove this once Python 3 support is fully |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
21 |
* supported and proper types are used in the extensions themselves. */ |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
22 |
#define PyInt_Type PyLong_Type |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
23 |
#define PyInt_AS_LONG PyLong_AS_LONG |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
24 |
#endif |
4ba863c88135
cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents:
33759
diff
changeset
|
25 |
|
34860
d0912876d7a7
parsers: protect some case-folding tables from clang-format
Augie Fackler <raf@durin42.com>
parents:
34439
diff
changeset
|
26 |
/* clang-format off */ |
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32384
diff
changeset
|
27 |
static const char lowertable[128] = { |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
28 |
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
29 |
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
30 |
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
31 |
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
32 |
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
33 |
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
34 |
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
35 |
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
36 |
'\x40', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
37 |
'\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
38 |
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
39 |
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
40 |
'\x78', '\x79', '\x7a', /* X-Z */ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
41 |
'\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
42 |
'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
43 |
'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
44 |
'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
45 |
'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
46 |
}; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
47 |
|
32386
7640584e697c
cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents:
32384
diff
changeset
|
48 |
static const char uppertable[128] = { |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
49 |
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
50 |
'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
51 |
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
52 |
'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
53 |
'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
54 |
'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
55 |
'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
56 |
'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
57 |
'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
58 |
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
59 |
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
60 |
'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
61 |
'\x60', |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
62 |
'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
63 |
'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
64 |
'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
65 |
'\x58', '\x59', '\x5a', /* x-z */ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
66 |
'\x7b', '\x7c', '\x7d', '\x7e', '\x7f' |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
67 |
}; |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
68 |
|
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
69 |
/* 1: no escape, 2: \<c>, 6: \u<x> */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
70 |
static const uint8_t jsonlentable[256] = { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
71 |
6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
72 |
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
73 |
1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
74 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
75 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
76 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
77 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
78 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
79 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
80 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
81 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
82 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
83 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
84 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
85 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
86 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
87 |
}; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
88 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
89 |
static const uint8_t jsonparanoidlentable[128] = { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
90 |
6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
91 |
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
92 |
1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
93 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
94 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
95 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
96 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
97 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
98 |
}; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
99 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
100 |
static const char hexchartable[16] = { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
101 |
'0', '1', '2', '3', '4', '5', '6', '7', |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
102 |
'8', '9', 'a', 'b', 'c', 'd', 'e', 'f', |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
103 |
}; |
36058
81199632fa42
charencode: adjust clang-format enable/disable comments
Augie Fackler <augie@google.com>
parents:
34860
diff
changeset
|
104 |
/* clang-format on */ |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
105 |
|
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
106 |
/* |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
107 |
* Turn a hex-encoded string into binary. |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
108 |
*/ |
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
109 |
PyObject *unhexlify(const char *str, Py_ssize_t len) |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
110 |
{ |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
111 |
PyObject *ret; |
6395
3f0294536b24
fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents:
6389
diff
changeset
|
112 |
char *d; |
33759
a22339d389d4
cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents:
33758
diff
changeset
|
113 |
Py_ssize_t i; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
114 |
|
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
115 |
ret = PyBytes_FromStringAndSize(NULL, len / 2); |
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
116 |
|
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
117 |
if (!ret) |
7092
fb3fc27617a2
parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents:
7091
diff
changeset
|
118 |
return NULL; |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
119 |
|
11361
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
120 |
d = PyBytes_AsString(ret); |
3de3d670d2b6
parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents:
10449
diff
changeset
|
121 |
|
16617
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
122 |
for (i = 0; i < len;) { |
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
123 |
int hi = hexdigit(str, i++); |
4fb16743049d
parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents:
16616
diff
changeset
|
124 |
int lo = hexdigit(str, i++); |
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
125 |
*d++ = (hi << 4) | lo; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
126 |
} |
7091
12b35ae03365
parsers: clean up whitespace
Matt Mackall <mpm@selenic.com>
parents:
6395
diff
changeset
|
127 |
|
6389
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
128 |
return ret; |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
129 |
} |
0231f763ebc8
manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff
changeset
|
130 |
|
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
131 |
PyObject *isasciistr(PyObject *self, PyObject *args) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
132 |
{ |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
133 |
const char *buf; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
134 |
Py_ssize_t i, len; |
36620
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36225
diff
changeset
|
135 |
if (!PyArg_ParseTuple(args, PY23("s#:isasciistr", "y#:isasciistr"), |
186c6df3a373
py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents:
36225
diff
changeset
|
136 |
&buf, &len)) |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
137 |
return NULL; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
138 |
i = 0; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
139 |
/* char array in PyStringObject should be at least 4-byte aligned */ |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
140 |
if (((uintptr_t)buf & 3) == 0) { |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
141 |
const uint32_t *p = (const uint32_t *)buf; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
142 |
for (; i < len / 4; i++) { |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
143 |
if (p[i] & 0x80808080U) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
144 |
Py_RETURN_FALSE; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
145 |
} |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
146 |
i *= 4; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
147 |
} |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
148 |
for (; i < len; i++) { |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
149 |
if (buf[i] & 0x80) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
150 |
Py_RETURN_FALSE; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
151 |
} |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
152 |
Py_RETURN_TRUE; |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
153 |
} |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33925
diff
changeset
|
154 |
|
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
155 |
static inline PyObject * |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
156 |
_asciitransform(PyObject *str_obj, const char table[128], PyObject *fallback_fn) |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
157 |
{ |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
158 |
char *str, *newstr; |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
159 |
Py_ssize_t i, len; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
160 |
PyObject *newobj = NULL; |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
161 |
PyObject *ret = NULL; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
162 |
|
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
163 |
str = PyBytes_AS_STRING(str_obj); |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
164 |
len = PyBytes_GET_SIZE(str_obj); |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
165 |
|
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
166 |
newobj = PyBytes_FromStringAndSize(NULL, len); |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
167 |
if (!newobj) |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
168 |
goto quit; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
169 |
|
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
170 |
newstr = PyBytes_AS_STRING(newobj); |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
171 |
|
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
172 |
for (i = 0; i < len; i++) { |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
173 |
char c = str[i]; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
174 |
if (c & 0x80) { |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
175 |
if (fallback_fn != NULL) { |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
176 |
ret = PyObject_CallFunctionObjArgs( |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
177 |
fallback_fn, str_obj, NULL); |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
178 |
} else { |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
179 |
PyObject *err = PyUnicodeDecodeError_Create( |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
180 |
"ascii", str, len, i, (i + 1), |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
181 |
"unexpected code byte"); |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
182 |
PyErr_SetObject(PyExc_UnicodeDecodeError, err); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
183 |
Py_XDECREF(err); |
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
184 |
} |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
185 |
goto quit; |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
186 |
} |
24576
fe173106e7fe
parsers: make _asciilower a generic _asciitransform function
Siddharth Agarwal <sid0@fb.com>
parents:
24575
diff
changeset
|
187 |
newstr[i] = table[(unsigned char)c]; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
188 |
} |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
189 |
|
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
190 |
ret = newobj; |
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
191 |
Py_INCREF(ret); |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
192 |
quit: |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
193 |
Py_XDECREF(newobj); |
24575
a62e957413f7
parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents:
24574
diff
changeset
|
194 |
return ret; |
22778
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
195 |
} |
80f2b63dd83a
parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents:
22604
diff
changeset
|
196 |
|
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
197 |
PyObject *asciilower(PyObject *self, PyObject *args) |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
198 |
{ |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
199 |
PyObject *str_obj; |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
200 |
if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
201 |
return NULL; |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
202 |
return _asciitransform(str_obj, lowertable, NULL); |
24574
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
203 |
} |
e97a00bf18ae
parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents:
24499
diff
changeset
|
204 |
|
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
205 |
PyObject *asciiupper(PyObject *self, PyObject *args) |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
206 |
{ |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
207 |
PyObject *str_obj; |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
208 |
if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
209 |
return NULL; |
24606
e4a733c34bc6
parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents:
24577
diff
changeset
|
210 |
return _asciitransform(str_obj, uppertable, NULL); |
24577
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
211 |
} |
bf55df007535
parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents:
24576
diff
changeset
|
212 |
|
33757
e9996bd7203f
cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents:
33756
diff
changeset
|
213 |
PyObject *make_file_foldmap(PyObject *self, PyObject *args) |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
214 |
{ |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
215 |
PyObject *dmap, *spec_obj, *normcase_fallback; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
216 |
PyObject *file_foldmap = NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
217 |
enum normcase_spec spec; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
218 |
PyObject *k, *v; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
219 |
dirstateTupleObject *tuple; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
220 |
Py_ssize_t pos = 0; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
221 |
const char *table; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
222 |
|
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
223 |
if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", &PyDict_Type, |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
224 |
&dmap, &PyInt_Type, &spec_obj, &PyFunction_Type, |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
225 |
&normcase_fallback)) |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
226 |
goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
227 |
|
24622
1e05f11619bb
parsers.c: avoid implicit conversion loses integer precision warning
André Sintzoff <andre.sintzoff@gmail.com>
parents:
24609
diff
changeset
|
228 |
spec = (int)PyInt_AS_LONG(spec_obj); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
229 |
switch (spec) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
230 |
case NORMCASE_LOWER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
231 |
table = lowertable; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
232 |
break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
233 |
case NORMCASE_UPPER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
234 |
table = uppertable; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
235 |
break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
236 |
case NORMCASE_OTHER: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
237 |
table = NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
238 |
break; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
239 |
default: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
240 |
PyErr_SetString(PyExc_TypeError, "invalid normcasespec"); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
241 |
goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
242 |
} |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
243 |
|
25583
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
244 |
/* Add some more entries to deal with additions outside this |
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
245 |
function. */ |
ce64c9ab19f2
parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents:
25582
diff
changeset
|
246 |
file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
247 |
if (file_foldmap == NULL) |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
248 |
goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
249 |
|
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
250 |
while (PyDict_Next(dmap, &pos, &k, &v)) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
251 |
if (!dirstate_tuple_check(v)) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
252 |
PyErr_SetString(PyExc_TypeError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
253 |
"expected a dirstate tuple"); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
254 |
goto quit; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
255 |
} |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
256 |
|
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
257 |
tuple = (dirstateTupleObject *)v; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
258 |
if (tuple->state != 'r') { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
259 |
PyObject *normed; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
260 |
if (table != NULL) { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
261 |
normed = _asciitransform(k, table, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
262 |
normcase_fallback); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
263 |
} else { |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
264 |
normed = PyObject_CallFunctionObjArgs( |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
265 |
normcase_fallback, k, NULL); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
266 |
} |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
267 |
|
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
268 |
if (normed == NULL) |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
269 |
goto quit; |
26049
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
270 |
if (PyDict_SetItem(file_foldmap, normed, k) == -1) { |
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
271 |
Py_DECREF(normed); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
272 |
goto quit; |
26049
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
273 |
} |
b1634b7804c7
parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents:
26048
diff
changeset
|
274 |
Py_DECREF(normed); |
24609
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
275 |
} |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
276 |
} |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
277 |
return file_foldmap; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
278 |
quit: |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
279 |
Py_XDECREF(file_foldmap); |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
280 |
return NULL; |
670aaee7931c
parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents:
24606
diff
changeset
|
281 |
} |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
282 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
283 |
/* calculate length of JSON-escaped string; returns -1 if unsupported */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
284 |
static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
285 |
{ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
286 |
Py_ssize_t i, esclen = 0; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
287 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
288 |
if (paranoid) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
289 |
/* don't want to process multi-byte escapes in C */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
290 |
for (i = 0; i < len; i++) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
291 |
char c = buf[i]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
292 |
if (c & 0x80) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
293 |
PyErr_SetString(PyExc_ValueError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
294 |
"cannot process non-ascii str"); |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
295 |
return -1; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
296 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
297 |
esclen += jsonparanoidlentable[(unsigned char)c]; |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
298 |
if (esclen < 0) { |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
299 |
PyErr_SetString(PyExc_MemoryError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
300 |
"overflow in jsonescapelen"); |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
301 |
return -1; |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
302 |
} |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
303 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
304 |
} else { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
305 |
for (i = 0; i < len; i++) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
306 |
char c = buf[i]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
307 |
esclen += jsonlentable[(unsigned char)c]; |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
308 |
if (esclen < 0) { |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
309 |
PyErr_SetString(PyExc_MemoryError, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
310 |
"overflow in jsonescapelen"); |
34030
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
311 |
return -1; |
e97be042fa1b
encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
312 |
} |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
313 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
314 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
315 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
316 |
return esclen; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
317 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
318 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
319 |
/* map '\<c>' escape character */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
320 |
static char jsonescapechar2(char c) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
321 |
{ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
322 |
switch (c) { |
34439
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
323 |
case '\b': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
324 |
return 'b'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
325 |
case '\t': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
326 |
return 't'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
327 |
case '\n': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
328 |
return 'n'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
329 |
case '\f': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
330 |
return 'f'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
331 |
case '\r': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
332 |
return 'r'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
333 |
case '"': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
334 |
return '"'; |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
335 |
case '\\': |
88e83a618de0
cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents:
34030
diff
changeset
|
336 |
return '\\'; |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
337 |
} |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
338 |
return '\0'; /* should not happen */ |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
339 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
340 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
341 |
/* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
342 |
include characters mappable by json(paranoid)lentable */ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
343 |
static void encodejsonescape(char *escbuf, Py_ssize_t esclen, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
344 |
const char *origbuf, Py_ssize_t origlen, |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
345 |
bool paranoid) |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
346 |
{ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
347 |
const uint8_t *lentable = |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
348 |
(paranoid) ? jsonparanoidlentable : jsonlentable; |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
349 |
Py_ssize_t i, j; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
350 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
351 |
for (i = 0, j = 0; i < origlen; i++) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
352 |
char c = origbuf[i]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
353 |
uint8_t l = lentable[(unsigned char)c]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
354 |
assert(j + l <= esclen); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
355 |
switch (l) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
356 |
case 1: |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
357 |
escbuf[j] = c; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
358 |
break; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
359 |
case 2: |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
360 |
escbuf[j] = '\\'; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
361 |
escbuf[j + 1] = jsonescapechar2(c); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
362 |
break; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
363 |
case 6: |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
364 |
memcpy(escbuf + j, "\\u00", 4); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
365 |
escbuf[j + 4] = hexchartable[(unsigned char)c >> 4]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
366 |
escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf]; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
367 |
break; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
368 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
369 |
j += l; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
370 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
371 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
372 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
373 |
PyObject *jsonescapeu8fast(PyObject *self, PyObject *args) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
374 |
{ |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
375 |
PyObject *origstr, *escstr; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
376 |
const char *origbuf; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
377 |
Py_ssize_t origlen, esclen; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
378 |
int paranoid; |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
379 |
if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", &PyBytes_Type, |
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
380 |
&origstr, ¶noid)) |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
381 |
return NULL; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
382 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
383 |
origbuf = PyBytes_AS_STRING(origstr); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
384 |
origlen = PyBytes_GET_SIZE(origstr); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
385 |
esclen = jsonescapelen(origbuf, origlen, paranoid); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
386 |
if (esclen < 0) |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
387 |
return NULL; /* unsupported char found or overflow */ |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
388 |
if (origlen == esclen) { |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
389 |
Py_INCREF(origstr); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
390 |
return origstr; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
391 |
} |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
392 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
393 |
escstr = PyBytes_FromStringAndSize(NULL, esclen); |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
394 |
if (!escstr) |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
395 |
return NULL; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
396 |
encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen, |
36225
6c87d4113a90
charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents:
36058
diff
changeset
|
397 |
paranoid); |
33925
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
398 |
|
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
399 |
return escstr; |
2c37f9dabc32
encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents:
33812
diff
changeset
|
400 |
} |