mercurial/cext/charencode.c
author Pierre-Yves David <pierre-yves.david@octobus.net>
Tue, 28 Feb 2023 19:01:20 +0100
branchstable
changeset 49695 a3b1ab5f5dee
parent 48912 b0dd39b91e7a
permissions -rw-r--r--
dirstate: deal with read-race for pure python code If we cannot read the dirstate data, this is probably because a writing process wrote it under our feet. So refresh the docket and try again a handful of time.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     1
/*
33778
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33777
diff changeset
     2
 charencode.c - miscellaneous character encoding
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     3
46819
d4ba4d51f85f contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents: 41361
diff changeset
     4
 Copyright 2008 Olivia Mackall <olivia@selenic.com> and others
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     5
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     6
 This software may be used and distributed according to the terms of
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     7
 the GNU General Public License, incorporated herein by reference.
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     8
*/
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
     9
33780
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33779
diff changeset
    10
#define PY_SSIZE_T_CLEAN
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
    11
#include <Python.h>
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    12
#include <assert.h>
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
    13
33779
0f4ac3b6dee4 cext: factor out header for charencode.c
Yuya Nishihara <yuya@tcha.org>
parents: 33778
diff changeset
    14
#include "charencode.h"
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
    15
#include "compat.h"
11361
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
    16
#include "util.h"
20742
3681de20b0a7 parsers: fail fast if Python has wrong minor version (issue4110)
Chris Jerdonek <chris.jerdonek@gmail.com>
parents: 20555
diff changeset
    17
34860
d0912876d7a7 parsers: protect some case-folding tables from clang-format
Augie Fackler <raf@durin42.com>
parents: 34440
diff changeset
    18
/* clang-format off */
32424
7640584e697c cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents: 32422
diff changeset
    19
static const char lowertable[128] = {
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    20
	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    21
	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    22
	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    23
	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    24
	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    25
	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    26
	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    27
	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    28
	'\x40',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    29
	        '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67', /* A-G */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    30
	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f', /* H-O */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    31
	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77', /* P-W */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    32
	'\x78', '\x79', '\x7a',                                         /* X-Z */
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    33
	                        '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    34
	'\x60', '\x61', '\x62', '\x63', '\x64', '\x65', '\x66', '\x67',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    35
	'\x68', '\x69', '\x6a', '\x6b', '\x6c', '\x6d', '\x6e', '\x6f',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    36
	'\x70', '\x71', '\x72', '\x73', '\x74', '\x75', '\x76', '\x77',
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    37
	'\x78', '\x79', '\x7a', '\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    38
};
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
    39
32424
7640584e697c cext: mark constant variables
Yuya Nishihara <yuya@tcha.org>
parents: 32422
diff changeset
    40
static const char uppertable[128] = {
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    41
	'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    42
	'\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    43
	'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    44
	'\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    45
	'\x20', '\x21', '\x22', '\x23', '\x24', '\x25', '\x26', '\x27',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    46
	'\x28', '\x29', '\x2a', '\x2b', '\x2c', '\x2d', '\x2e', '\x2f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    47
	'\x30', '\x31', '\x32', '\x33', '\x34', '\x35', '\x36', '\x37',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    48
	'\x38', '\x39', '\x3a', '\x3b', '\x3c', '\x3d', '\x3e', '\x3f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    49
	'\x40', '\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    50
	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    51
	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    52
	'\x58', '\x59', '\x5a', '\x5b', '\x5c', '\x5d', '\x5e', '\x5f',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    53
	'\x60',
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    54
		'\x41', '\x42', '\x43', '\x44', '\x45', '\x46', '\x47', /* a-g */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    55
	'\x48', '\x49', '\x4a', '\x4b', '\x4c', '\x4d', '\x4e', '\x4f', /* h-o */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    56
	'\x50', '\x51', '\x52', '\x53', '\x54', '\x55', '\x56', '\x57', /* p-w */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    57
	'\x58', '\x59', '\x5a', 					/* x-z */
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    58
				'\x7b', '\x7c', '\x7d', '\x7e', '\x7f'
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    59
};
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
    60
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    61
/* 1: no escape, 2: \<c>, 6: \u<x> */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    62
static const uint8_t jsonlentable[256] = {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    63
	6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    64
	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    65
	1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    66
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    67
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    68
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    69
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    70
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    71
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    72
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    73
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    74
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    75
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    76
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    77
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    78
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    79
};
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    80
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    81
static const uint8_t jsonparanoidlentable[128] = {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    82
	6, 6, 6, 6, 6, 6, 6, 6, 2, 2, 2, 6, 2, 2, 6, 6, /* b, t, n, f, r */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    83
	6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    84
	1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* " */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    85
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 1, 6, 1, /* <, > */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    86
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    87
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, /* \\ */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    88
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    89
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, /* DEL */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    90
};
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    91
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    92
static const char hexchartable[16] = {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    93
	'0', '1', '2', '3', '4', '5', '6', '7',
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    94
	'8', '9', 'a', 'b', 'c', 'd', 'e', 'f',
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    95
};
36096
81199632fa42 charencode: adjust clang-format enable/disable comments
Augie Fackler <augie@google.com>
parents: 34860
diff changeset
    96
/* clang-format on */
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
    97
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
    98
/*
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
    99
 * Turn a hex-encoded string into binary.
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   100
 */
33780
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33779
diff changeset
   101
PyObject *unhexlify(const char *str, Py_ssize_t len)
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   102
{
7092
fb3fc27617a2 parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents: 7091
diff changeset
   103
	PyObject *ret;
6395
3f0294536b24 fix const annotation warning
Benoit Boissinot <benoit.boissinot@ens-lyon.org>
parents: 6389
diff changeset
   104
	char *d;
33780
a22339d389d4 cext: modernize charencode.c to use Py_ssize_t
Yuya Nishihara <yuya@tcha.org>
parents: 33779
diff changeset
   105
	Py_ssize_t i;
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   106
11361
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   107
	ret = PyBytes_FromStringAndSize(NULL, len / 2);
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   108
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   109
	if (!ret) {
7092
fb3fc27617a2 parsers: speed up hex decoding for manifests
Matt Mackall <mpm@selenic.com>
parents: 7091
diff changeset
   110
		return NULL;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   111
	}
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   112
11361
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   113
	d = PyBytes_AsString(ret);
3de3d670d2b6 parsers.c: Added support for py3k.
Renato Cunha <renatoc@gmail.com>
parents: 10449
diff changeset
   114
16617
4fb16743049d parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents: 16616
diff changeset
   115
	for (i = 0; i < len;) {
4fb16743049d parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents: 16616
diff changeset
   116
		int hi = hexdigit(str, i++);
4fb16743049d parsers: change the type signature of hexdigit
Bryan O'Sullivan <bryano@fb.com>
parents: 16616
diff changeset
   117
		int lo = hexdigit(str, i++);
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   118
		*d++ = (hi << 4) | lo;
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   119
	}
7091
12b35ae03365 parsers: clean up whitespace
Matt Mackall <mpm@selenic.com>
parents: 6395
diff changeset
   120
6389
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   121
	return ret;
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   122
}
0231f763ebc8 manifest: improve parsing performance by 8x via a new C extension
Bryan O'Sullivan <bos@serpentine.com>
parents:
diff changeset
   123
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   124
PyObject *isasciistr(PyObject *self, PyObject *args)
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   125
{
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   126
	const char *buf;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   127
	Py_ssize_t i, len;
48912
b0dd39b91e7a cext: remove PY23()
Gregory Szorc <gregory.szorc@gmail.com>
parents: 48902
diff changeset
   128
	if (!PyArg_ParseTuple(args, "y#:isasciistr", &buf, &len)) {
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   129
		return NULL;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   130
	}
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   131
	i = 0;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   132
	/* char array in PyStringObject should be at least 4-byte aligned */
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   133
	if (((uintptr_t)buf & 3) == 0) {
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   134
		const uint32_t *p = (const uint32_t *)buf;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   135
		for (; i < len / 4; i++) {
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   136
			if (p[i] & 0x80808080U) {
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   137
				Py_RETURN_FALSE;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   138
			}
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   139
		}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   140
		i *= 4;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   141
	}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   142
	for (; i < len; i++) {
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   143
		if (buf[i] & 0x80) {
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   144
			Py_RETURN_FALSE;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   145
		}
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   146
	}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   147
	Py_RETURN_TRUE;
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   148
}
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents: 33943
diff changeset
   149
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   150
static inline PyObject *
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   151
_asciitransform(PyObject *str_obj, const char table[128], PyObject *fallback_fn)
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   152
{
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   153
	char *str, *newstr;
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   154
	Py_ssize_t i, len;
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   155
	PyObject *newobj = NULL;
24575
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   156
	PyObject *ret = NULL;
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   157
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   158
	str = PyBytes_AS_STRING(str_obj);
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   159
	len = PyBytes_GET_SIZE(str_obj);
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   160
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   161
	newobj = PyBytes_FromStringAndSize(NULL, len);
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   162
	if (!newobj) {
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   163
		goto quit;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   164
	}
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   165
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   166
	newstr = PyBytes_AS_STRING(newobj);
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   167
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   168
	for (i = 0; i < len; i++) {
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   169
		char c = str[i];
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   170
		if (c & 0x80) {
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   171
			if (fallback_fn != NULL) {
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   172
				ret = PyObject_CallFunctionObjArgs(
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   173
				    fallback_fn, str_obj, NULL);
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   174
			} else {
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   175
				PyObject *err = PyUnicodeDecodeError_Create(
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   176
				    "ascii", str, len, i, (i + 1),
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   177
				    "unexpected code byte");
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   178
				PyErr_SetObject(PyExc_UnicodeDecodeError, err);
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   179
				Py_XDECREF(err);
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   180
			}
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   181
			goto quit;
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   182
		}
24576
fe173106e7fe parsers: make _asciilower a generic _asciitransform function
Siddharth Agarwal <sid0@fb.com>
parents: 24575
diff changeset
   183
		newstr[i] = table[(unsigned char)c];
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   184
	}
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   185
24575
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   186
	ret = newobj;
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   187
	Py_INCREF(ret);
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   188
quit:
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   189
	Py_XDECREF(newobj);
24575
a62e957413f7 parsers._asciilower: use an explicit return object
Siddharth Agarwal <sid0@fb.com>
parents: 24574
diff changeset
   190
	return ret;
22778
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   191
}
80f2b63dd83a parsers: add a function to efficiently lowercase ASCII strings
Siddharth Agarwal <sid0@fb.com>
parents: 22604
diff changeset
   192
33778
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33777
diff changeset
   193
PyObject *asciilower(PyObject *self, PyObject *args)
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   194
{
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   195
	PyObject *str_obj;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   196
	if (!PyArg_ParseTuple(args, "O!:asciilower", &PyBytes_Type, &str_obj)) {
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   197
		return NULL;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   198
	}
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   199
	return _asciitransform(str_obj, lowertable, NULL);
24574
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   200
}
e97a00bf18ae parsers: factor out most of asciilower into an internal function
Siddharth Agarwal <sid0@fb.com>
parents: 24499
diff changeset
   201
33778
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33777
diff changeset
   202
PyObject *asciiupper(PyObject *self, PyObject *args)
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   203
{
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   204
	PyObject *str_obj;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   205
	if (!PyArg_ParseTuple(args, "O!:asciiupper", &PyBytes_Type, &str_obj)) {
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   206
		return NULL;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   207
	}
24606
e4a733c34bc6 parsers._asciitransform: also accept a fallback function
Siddharth Agarwal <sid0@fb.com>
parents: 24577
diff changeset
   208
	return _asciitransform(str_obj, uppertable, NULL);
24577
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   209
}
bf55df007535 parsers: introduce an asciiupper function
Siddharth Agarwal <sid0@fb.com>
parents: 24576
diff changeset
   210
33778
e9996bd7203f cext: split character encoding functions to new compilation unit
Yuya Nishihara <yuya@tcha.org>
parents: 33777
diff changeset
   211
PyObject *make_file_foldmap(PyObject *self, PyObject *args)
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   212
{
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   213
	PyObject *dmap, *spec_obj, *normcase_fallback;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   214
	PyObject *file_foldmap = NULL;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   215
	enum normcase_spec spec;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   216
	PyObject *k, *v;
47539
84391ddf4c78 dirstate-item: rename the class to DirstateItem
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   217
	dirstateItemObject *tuple;
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   218
	Py_ssize_t pos = 0;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   219
	const char *table;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   220
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   221
	if (!PyArg_ParseTuple(args, "O!O!O!:make_file_foldmap", &PyDict_Type,
48902
92c430e7e37a cext: drop preprocessor PyInt aliases
Gregory Szorc <gregory.szorc@gmail.com>
parents: 47976
diff changeset
   222
	                      &dmap, &PyLong_Type, &spec_obj, &PyFunction_Type,
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   223
	                      &normcase_fallback)) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   224
		goto quit;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   225
	}
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   226
48902
92c430e7e37a cext: drop preprocessor PyInt aliases
Gregory Szorc <gregory.szorc@gmail.com>
parents: 47976
diff changeset
   227
	spec = (int)PyLong_AS_LONG(spec_obj);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   228
	switch (spec) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   229
	case NORMCASE_LOWER:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   230
		table = lowertable;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   231
		break;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   232
	case NORMCASE_UPPER:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   233
		table = uppertable;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   234
		break;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   235
	case NORMCASE_OTHER:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   236
		table = NULL;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   237
		break;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   238
	default:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   239
		PyErr_SetString(PyExc_TypeError, "invalid normcasespec");
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   240
		goto quit;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   241
	}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   242
25583
ce64c9ab19f2 parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents: 25582
diff changeset
   243
	/* Add some more entries to deal with additions outside this
ce64c9ab19f2 parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents: 25582
diff changeset
   244
	   function. */
ce64c9ab19f2 parsers: factor out code to create a presized dict
Siddharth Agarwal <sid0@fb.com>
parents: 25582
diff changeset
   245
	file_foldmap = _dict_new_presized((PyDict_Size(dmap) / 10) * 11);
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   246
	if (file_foldmap == NULL) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   247
		goto quit;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   248
	}
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   249
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   250
	while (PyDict_Next(dmap, &pos, &k, &v)) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   251
		if (!dirstate_tuple_check(v)) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   252
			PyErr_SetString(PyExc_TypeError,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   253
			                "expected a dirstate tuple");
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   254
			goto quit;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   255
		}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   256
47539
84391ddf4c78 dirstate-item: rename the class to DirstateItem
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 46819
diff changeset
   257
		tuple = (dirstateItemObject *)v;
47976
83f0e93ec34b dirstate-item: move the C implementation to the same logic
Pierre-Yves David <pierre-yves.david@octobus.net>
parents: 47539
diff changeset
   258
		if (tuple->flags | dirstate_flag_wc_tracked) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   259
			PyObject *normed;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   260
			if (table != NULL) {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   261
				normed = _asciitransform(k, table,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   262
				                         normcase_fallback);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   263
			} else {
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   264
				normed = PyObject_CallFunctionObjArgs(
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   265
				    normcase_fallback, k, NULL);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   266
			}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   267
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   268
			if (normed == NULL) {
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   269
				goto quit;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   270
			}
26049
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   271
			if (PyDict_SetItem(file_foldmap, normed, k) == -1) {
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   272
				Py_DECREF(normed);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   273
				goto quit;
26049
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   274
			}
b1634b7804c7 parsers: correctly decref normed value after PyDict_SetItem
Augie Fackler <augie@google.com>
parents: 26048
diff changeset
   275
			Py_DECREF(normed);
24609
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   276
		}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   277
	}
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   278
	return file_foldmap;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   279
quit:
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   280
	Py_XDECREF(file_foldmap);
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   281
	return NULL;
670aaee7931c parsers: add a C function to create a file foldmap
Siddharth Agarwal <sid0@fb.com>
parents: 24606
diff changeset
   282
}
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   283
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   284
/* calculate length of JSON-escaped string; returns -1 if unsupported */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   285
static Py_ssize_t jsonescapelen(const char *buf, Py_ssize_t len, bool paranoid)
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   286
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   287
	Py_ssize_t i, esclen = 0;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   288
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   289
	if (paranoid) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   290
		/* don't want to process multi-byte escapes in C */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   291
		for (i = 0; i < len; i++) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   292
			char c = buf[i];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   293
			if (c & 0x80) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   294
				PyErr_SetString(PyExc_ValueError,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   295
				                "cannot process non-ascii str");
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   296
				return -1;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   297
			}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   298
			esclen += jsonparanoidlentable[(unsigned char)c];
34047
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   299
			if (esclen < 0) {
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   300
				PyErr_SetString(PyExc_MemoryError,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   301
				                "overflow in jsonescapelen");
34047
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   302
				return -1;
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   303
			}
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   304
		}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   305
	} else {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   306
		for (i = 0; i < len; i++) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   307
			char c = buf[i];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   308
			esclen += jsonlentable[(unsigned char)c];
34047
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   309
			if (esclen < 0) {
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   310
				PyErr_SetString(PyExc_MemoryError,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   311
				                "overflow in jsonescapelen");
34047
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   312
				return -1;
e97be042fa1b encoding: check overflow while calculating size of JSON escape buffer
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
   313
			}
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   314
		}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   315
	}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   316
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   317
	return esclen;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   318
}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   319
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   320
/* map '\<c>' escape character */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   321
static char jsonescapechar2(char c)
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   322
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   323
	switch (c) {
34440
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   324
	case '\b':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   325
		return 'b';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   326
	case '\t':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   327
		return 't';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   328
	case '\n':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   329
		return 'n';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   330
	case '\f':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   331
		return 'f';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   332
	case '\r':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   333
		return 'r';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   334
	case '"':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   335
		return '"';
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   336
	case '\\':
88e83a618de0 cext: put case statements on separate line
Gregory Szorc <gregory.szorc@gmail.com>
parents: 34047
diff changeset
   337
		return '\\';
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   338
	}
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   339
	return '\0'; /* should not happen */
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   340
}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   341
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   342
/* convert 'origbuf' to JSON-escaped form 'escbuf'; 'origbuf' should only
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   343
   include characters mappable by json(paranoid)lentable */
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   344
static void encodejsonescape(char *escbuf, Py_ssize_t esclen,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   345
                             const char *origbuf, Py_ssize_t origlen,
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   346
                             bool paranoid)
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   347
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   348
	const uint8_t *lentable =
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   349
	    (paranoid) ? jsonparanoidlentable : jsonlentable;
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   350
	Py_ssize_t i, j;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   351
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   352
	for (i = 0, j = 0; i < origlen; i++) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   353
		char c = origbuf[i];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   354
		uint8_t l = lentable[(unsigned char)c];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   355
		assert(j + l <= esclen);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   356
		switch (l) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   357
		case 1:
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   358
			escbuf[j] = c;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   359
			break;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   360
		case 2:
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   361
			escbuf[j] = '\\';
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   362
			escbuf[j + 1] = jsonescapechar2(c);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   363
			break;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   364
		case 6:
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   365
			memcpy(escbuf + j, "\\u00", 4);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   366
			escbuf[j + 4] = hexchartable[(unsigned char)c >> 4];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   367
			escbuf[j + 5] = hexchartable[(unsigned char)c & 0xf];
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   368
			break;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   369
		}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   370
		j += l;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   371
	}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   372
}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   373
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   374
PyObject *jsonescapeu8fast(PyObject *self, PyObject *args)
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   375
{
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   376
	PyObject *origstr, *escstr;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   377
	const char *origbuf;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   378
	Py_ssize_t origlen, esclen;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   379
	int paranoid;
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   380
	if (!PyArg_ParseTuple(args, "O!i:jsonescapeu8fast", &PyBytes_Type,
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   381
	                      &origstr, &paranoid)) {
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   382
		return NULL;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   383
	}
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   384
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   385
	origbuf = PyBytes_AS_STRING(origstr);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   386
	origlen = PyBytes_GET_SIZE(origstr);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   387
	esclen = jsonescapelen(origbuf, origlen, paranoid);
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   388
	if (esclen < 0) {
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   389
		return NULL; /* unsupported char found or overflow */
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   390
	}
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   391
	if (origlen == esclen) {
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   392
		Py_INCREF(origstr);
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   393
		return origstr;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   394
	}
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   395
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   396
	escstr = PyBytes_FromStringAndSize(NULL, esclen);
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   397
	if (!escstr) {
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   398
		return NULL;
41361
763b45bc4483 cleanup: use clang-tidy to add missing {} around one-line statements
Augie Fackler <augie@google.com>
parents: 36649
diff changeset
   399
	}
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   400
	encodejsonescape(PyBytes_AS_STRING(escstr), esclen, origbuf, origlen,
36263
6c87d4113a90 charencode: allow clang-format oversight
Augie Fackler <augie@google.com>
parents: 36096
diff changeset
   401
	                 paranoid);
33943
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   402
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   403
	return escstr;
2c37f9dabc32 encoding: add fast path of jsonescape() (issue5533)
Yuya Nishihara <yuya@tcha.org>
parents: 33833
diff changeset
   404
}