mercurial/cext/charencode.c
author Pierre-Yves David <pierre-yves.david@octobus.net>
Wed, 13 Nov 2019 20:42:13 +0100
changeset 43786 421ea5772039
parent 41336 763b45bc4483
child 46819 d4ba4d51f85f
permissions -rw-r--r--
copies: split the combination of the copies mapping in its own function In some case, this part take up to 95% of the copy tracing that take about a hundred second. This poor performance comes from the fact we keep duplciating and merging dictionary that are mostly similar. I want to experiment with smarter native code to do this, so I need to isolate the function first.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     1
/*
33757
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
     2
 charencode.c - miscellaneous character encoding
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     3
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     4
 Copyright 2008 Matt Mackall <mpm@selenic.com> and others
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     5
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     8
*/
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     9
33759
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33758
diff changeset
    10
#define PY_SSIZE_T_CLEAN
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
    11
#include <Python.h>
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    12
#include <assert.h>
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
    13
33758
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33757
diff changeset
    14
#include "charencode.h"
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
    15
#include "compat.h"
11361
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
    16
#include "util.h"
20742
3681de20b0a7 parsers: fail fast if Python has wrong minor version (issue4110)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents: 20555
diff changeset
    17
33812
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    18
#ifdef IS_PY3K
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    19
/* The mapping of Python types is meant to be temporary to get Python
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    20
 * 3 to compile. We should remove this once Python 3 support is fully
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    21
 * supported and proper types are used in the extensions themselves. */
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    22
#define PyInt_Type PyLong_Type
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    23
#define PyInt_AS_LONG PyLong_AS_LONG
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    24
#endif
4ba863c88135 cext: move PyInt macros to charencode.c properly
Yuya Nishihara <yuya@tcha.org>
parents: 33759
diff changeset
    25
34860
d0912876d7a7 parsers: protect some case-folding tables from clang-format
Augie Fackler <raf@durin42.com>
parents: 34439
diff changeset
    26
/* clang-format off */
32386
7640584e697c cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents: 32384
diff changeset
    27
static const char lowertable[128] = {
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    28
	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    29
	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    30
	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    31
	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    32
	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    33
	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    34
	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    35
	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    36
	'\x40',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    37
	        '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    38
	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    39
	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    40
	'\x78', '\x79', '\x7a',                                         /* X-Z */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    41
	                        '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    42
	'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    43
	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    44
	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    45
	'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    46
};
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    47
32386
7640584e697c cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents: 32384
diff changeset
    48
static const char uppertable[128] = {
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    49
	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    50
	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    51
	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    52
	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    53
	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    54
	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    55
	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    56
	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    57
	'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    58
	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    59
	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    60
	'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    61
	'\x60',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    62
		'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    63
	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    64
	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    65
	'\x58', '\x59', '\x5a', 					/* x-z */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    66
				'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    67
};
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    68
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    69
/* 1: no escape, 2: \<c>, 6: \u<x> */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    70
static const uint8_t jsonlentable[256] = {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    71
	6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    72
	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    73
	1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    74
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    75
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    76
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    77
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    78
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    79
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    80
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    81
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    82
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    83
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    84
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    85
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    86
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    87
};
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    88
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    89
static const uint8_t jsonparanoidlentable[128] = {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    90
	6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    91
	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    92
	1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    93
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    94
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    95
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    96
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    97
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    98
};
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
    99
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   100
static const char hexchartable[16] = {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   101
	'0', '1', '2', '3', '4', '5', '6', '7',
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   102
	'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   103
};
36058
81199632fa42 charencode: adjust clang-format enable/disable comments
Augie Fackler <augie@google.com>
parents: 34860
diff changeset
   104
/* clang-format on */
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   105
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   106
/*
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   107
 * Turn a hex-encoded string into binary.
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   108
 */
33759
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33758
diff changeset
   109
PyObject *unhexlify(const char *str, Py_ssize_t len)
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   110
{
7092
fb3fc27617a2 parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents: 7091
diff changeset
   111
	PyObject *ret;
6395
3f0294536b24 fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 6389
diff changeset
   112
	char *d;
33759
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33758
diff changeset
   113
	Py_ssize_t i;
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   114
11361
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   115
	ret = PyBytes_FromStringAndSize(NULL, len / 2);
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   116
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   117
	if (!ret) {
7092
fb3fc27617a2 parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents: 7091
diff changeset
   118
		return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   119
	}
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   120
11361
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   121
	d = PyBytes_AsString(ret);
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   122
16617
4fb16743049d parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents: 16616
diff changeset
   123
	for (i = 0; i < len;) {
4fb16743049d parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents: 16616
diff changeset
   124
		int hi = hexdigit(str, i++);
4fb16743049d parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents: 16616
diff changeset
   125
		int lo = hexdigit(str, i++);
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   126
		*d++ = (hi << 4) | lo;
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   127
	}
7091
12b35ae03365 parsers: clean up whitespace
Matt Mackall <mpm@selenic.com>
parents: 6395
diff changeset
   128
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   129
	return ret;
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   130
}
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   131
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   132
PyObject *isasciistr(PyObject *self, PyObject *args)
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   133
{
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   134
	const char *buf;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   135
	Py_ssize_t i, len;
36620
186c6df3a373 py3: bulk-replace 'const char*' format specifier passed to PyArg_ParseTuple*()
Yuya Nishihara <yuya@tcha.org>
parents: 36225
diff changeset
   136
	if (!PyArg_ParseTuple(args, PY23("s#:isasciistr", "y#:isasciistr"),
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   137
	                      &buf, &len)) {
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   138
		return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   139
	}
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   140
	i = 0;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   141
	/* char array in PyStringObject should be at least 4-byte aligned */
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   142
	if (((uintptr_t)buf & 3) == 0) {
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   143
		const uint32_t *p = (const uint32_t *)buf;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   144
		for (; i < len / 4; i++) {
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   145
			if (p[i] & 0x80808080U) {
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   146
				Py_RETURN_FALSE;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   147
			}
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   148
		}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   149
		i *= 4;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   150
	}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   151
	for (; i < len; i++) {
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   152
		if (buf[i] & 0x80) {
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   153
			Py_RETURN_FALSE;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   154
		}
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   155
	}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   156
	Py_RETURN_TRUE;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   157
}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33925
diff changeset
   158
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   159
static inline PyObject *
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   160
_asciitransform(PyObject *str_obj, const char table[128], PyObject *fallback_fn)
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   161
{
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   162
	char *str, *newstr;
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   163
	Py_ssize_t i, len;
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   164
	PyObject *newobj = NULL;
24575
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   165
	PyObject *ret = NULL;
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   166
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   167
	str = PyBytes_AS_STRING(str_obj);
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   168
	len = PyBytes_GET_SIZE(str_obj);
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   169
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   170
	newobj = PyBytes_FromStringAndSize(NULL, len);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   171
	if (!newobj) {
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   172
		goto quit;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   173
	}
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   174
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   175
	newstr = PyBytes_AS_STRING(newobj);
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   176
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   177
	for (i = 0; i < len; i++) {
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   178
		char c = str[i];
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   179
		if (c & 0x80) {
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   180
			if (fallback_fn != NULL) {
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   181
				ret = PyObject_CallFunctionObjArgs(
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   182
				    fallback_fn, str_obj, NULL);
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   183
			} else {
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   184
				PyObject *err = PyUnicodeDecodeError_Create(
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   185
				    "ascii", str, len, i, (i + 1),
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   186
				    "unexpected code byte");
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   187
				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   188
				Py_XDECREF(err);
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   189
			}
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   190
			goto quit;
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   191
		}
24576
fe173106e7fe parsers: make _asciilower a generic _asciitransform function
Siddharth Agarwal <sid0@fb.com>
parents: 24575
diff changeset
   192
		newstr[i] = table[(unsigned char)c];
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   193
	}
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   194
24575
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   195
	ret = newobj;
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   196
	Py_INCREF(ret);
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   197
quit:
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   198
	Py_XDECREF(newobj);
24575
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   199
	return ret;
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   200
}
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   201
33757
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
   202
PyObject *asciilower(PyObject *self, PyObject *args)
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   203
{
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   204
	PyObject *str_obj;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   205
	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) {
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   206
		return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   207
	}
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   208
	return _asciitransform(str_obj, lowertable, NULL);
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   209
}
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   210
33757
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
   211
PyObject *asciiupper(PyObject *self, PyObject *args)
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   212
{
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   213
	PyObject *str_obj;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   214
	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) {
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   215
		return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   216
	}
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   217
	return _asciitransform(str_obj, uppertable, NULL);
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   218
}
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   219
33757
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33756
diff changeset
   220
PyObject *make_file_foldmap(PyObject *self, PyObject *args)
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   221
{
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   222
	PyObject *dmap, *spec_obj, *normcase_fallback;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   223
	PyObject *file_foldmap = NULL;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   224
	enum normcase_spec spec;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   225
	PyObject *k, *v;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   226
	dirstateTupleObject *tuple;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   227
	Py_ssize_t pos = 0;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   228
	const char *table;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   229
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   230
	if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", &PyDict_Type,
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   231
	                      &dmap, &PyInt_Type, &spec_obj, &PyFunction_Type,
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   232
	                      &normcase_fallback)) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   233
		goto quit;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   234
	}
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   235
24622
1e05f11619bb parsers.c: avoid implicit conversion loses integer precision warning
André Sintzoff <andre.sintzoff@gmail.com>
parents: 24609
diff changeset
   236
	spec = (int)PyInt_AS_LONG(spec_obj);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   237
	switch (spec) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   238
	case NORMCASE_LOWER:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   239
		table = lowertable;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   240
		break;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   241
	case NORMCASE_UPPER:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   242
		table = uppertable;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   243
		break;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   244
	case NORMCASE_OTHER:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   245
		table = NULL;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   246
		break;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   247
	default:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   248
		PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   249
		goto quit;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   250
	}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   251
25583
ce64c9ab19f2 parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents: 25582
diff changeset
   252
	/* Add some more entries to deal with additions outside this
ce64c9ab19f2 parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents: 25582
diff changeset
   253
	   function. */
ce64c9ab19f2 parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents: 25582
diff changeset
   254
	file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   255
	if (file_foldmap == NULL) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   256
		goto quit;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   257
	}
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   258
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   259
	while (PyDict_Next(dmap, &pos, &k, &v)) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   260
		if (!dirstate_tuple_check(v)) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   261
			PyErr_SetString(PyExc_TypeError,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   262
			                "expected a dirstate tuple");
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   263
			goto quit;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   264
		}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   265
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   266
		tuple = (dirstateTupleObject *)v;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   267
		if (tuple->state != 'r') {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   268
			PyObject *normed;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   269
			if (table != NULL) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   270
				normed = _asciitransform(k, table,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   271
				                         normcase_fallback);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   272
			} else {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   273
				normed = PyObject_CallFunctionObjArgs(
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   274
				    normcase_fallback, k, NULL);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   275
			}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   276
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   277
			if (normed == NULL) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   278
				goto quit;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   279
			}
26049
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   280
			if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   281
				Py_DECREF(normed);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   282
				goto quit;
26049
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   283
			}
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   284
			Py_DECREF(normed);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   285
		}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   286
	}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   287
	return file_foldmap;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   288
quit:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   289
	Py_XDECREF(file_foldmap);
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   290
	return NULL;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   291
}
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   292
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   293
/* calculate length of JSON-escaped string; returns -1 if unsupported */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   294
static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   295
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   296
	Py_ssize_t i, esclen = 0;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   297
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   298
	if (paranoid) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   299
		/* don't want to process multi-byte escapes in C */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   300
		for (i = 0; i < len; i++) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   301
			char c = buf[i];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   302
			if (c & 0x80) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   303
				PyErr_SetString(PyExc_ValueError,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   304
				                "cannot process non-ascii str");
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   305
				return -1;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   306
			}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   307
			esclen += jsonparanoidlentable[(unsigned char)c];
34030
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   308
			if (esclen < 0) {
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   309
				PyErr_SetString(PyExc_MemoryError,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   310
				                "overflow in jsonescapelen");
34030
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   311
				return -1;
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   312
			}
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   313
		}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   314
	} else {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   315
		for (i = 0; i < len; i++) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   316
			char c = buf[i];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   317
			esclen += jsonlentable[(unsigned char)c];
34030
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   318
			if (esclen < 0) {
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   319
				PyErr_SetString(PyExc_MemoryError,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   320
				                "overflow in jsonescapelen");
34030
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   321
				return -1;
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
   322
			}
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   323
		}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   324
	}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   325
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   326
	return esclen;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   327
}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   328
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   329
/* map '\<c>' escape character */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   330
static char jsonescapechar2(char c)
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   331
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   332
	switch (c) {
34439
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   333
	case '\b':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   334
		return 'b';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   335
	case '\t':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   336
		return 't';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   337
	case '\n':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   338
		return 'n';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   339
	case '\f':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   340
		return 'f';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   341
	case '\r':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   342
		return 'r';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   343
	case '"':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   344
		return '"';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   345
	case '\\':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34030
diff changeset
   346
		return '\\';
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   347
	}
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   348
	return '\0'; /* should not happen */
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   349
}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   350
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   351
/* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   352
   include characters mappable by json(paranoid)lentable */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   353
static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   354
                             const char *origbuf, Py_ssize_t origlen,
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   355
                             bool paranoid)
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   356
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   357
	const uint8_t *lentable =
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   358
	    (paranoid) ? jsonparanoidlentable : jsonlentable;
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   359
	Py_ssize_t i, j;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   360
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   361
	for (i = 0, j = 0; i < origlen; i++) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   362
		char c = origbuf[i];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   363
		uint8_t l = lentable[(unsigned char)c];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   364
		assert(j + l <= esclen);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   365
		switch (l) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   366
		case 1:
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   367
			escbuf[j] = c;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   368
			break;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   369
		case 2:
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   370
			escbuf[j] = '\\';
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   371
			escbuf[j + 1] = jsonescapechar2(c);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   372
			break;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   373
		case 6:
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   374
			memcpy(escbuf + j, "\\u00", 4);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   375
			escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   376
			escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   377
			break;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   378
		}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   379
		j += l;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   380
	}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   381
}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   382
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   383
PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   384
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   385
	PyObject *origstr, *escstr;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   386
	const char *origbuf;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   387
	Py_ssize_t origlen, esclen;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   388
	int paranoid;
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   389
	if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", &PyBytes_Type,
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   390
	                      &origstr, &paranoid)) {
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   391
		return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   392
	}
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   393
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   394
	origbuf = PyBytes_AS_STRING(origstr);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   395
	origlen = PyBytes_GET_SIZE(origstr);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   396
	esclen = jsonescapelen(origbuf, origlen, paranoid);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   397
	if (esclen < 0) {
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   398
		return NULL; /* unsupported char found or overflow */
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   399
	}
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   400
	if (origlen == esclen) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   401
		Py_INCREF(origstr);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   402
		return origstr;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   403
	}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   404
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   405
	escstr = PyBytes_FromStringAndSize(NULL, esclen);
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   406
	if (!escstr) {
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   407
		return NULL;
41336
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36620
diff changeset
   408
	}
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   409
	encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
36225
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36058
diff changeset
   410
	                 paranoid);
33925
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   411
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   412
	return escstr;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33812
diff changeset
   413
}