Mercurial > hg
annotate mercurial/pure/charencode.py @ 36297:a59ff82154b8
httppeer: headers are native strings
# skip-blame just marking some native strings
Differential Revision: https://phab.mercurial-scm.org/D2313
author | Augie Fackler <augie@google.com> |
---|---|
date | Sun, 18 Feb 2018 00:03:39 -0500 |
parents | aa877860d4d7 |
children | 2372284d9457 |
rev | line source |
---|---|
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
1 # charencode.py - miscellaneous character encoding |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
2 # |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
4 # |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
6 # GNU General Public License version 2 or any later version. |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
7 |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
8 from __future__ import absolute_import |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
9 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
10 import array |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
11 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
12 from .. import ( |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
13 pycompat, |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
14 ) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
15 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
16 def isasciistr(s): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
17 try: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
18 s.decode('ascii') |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
19 return True |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
20 except UnicodeDecodeError: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
21 return False |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
22 |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
23 def asciilower(s): |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
24 '''convert a string to lowercase if ASCII |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
25 |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
26 Raises UnicodeDecodeError if non-ASCII characters are found.''' |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
27 s.decode('ascii') |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
28 return s.lower() |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
29 |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
30 def asciiupper(s): |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
31 '''convert a string to uppercase if ASCII |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
32 |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
33 Raises UnicodeDecodeError if non-ASCII characters are found.''' |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
34 s.decode('ascii') |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
35 return s.upper() |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
36 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
37 _jsonmap = [] |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
38 _jsonmap.extend("\\u%04x" % x for x in range(32)) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
39 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
40 _jsonmap.append('\\u007f') |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
41 _jsonmap[0x09] = '\\t' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
42 _jsonmap[0x0a] = '\\n' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
43 _jsonmap[0x22] = '\\"' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
44 _jsonmap[0x5c] = '\\\\' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
45 _jsonmap[0x08] = '\\b' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
46 _jsonmap[0x0c] = '\\f' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
47 _jsonmap[0x0d] = '\\r' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
48 _paranoidjsonmap = _jsonmap[:] |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
49 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>") |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
50 _paranoidjsonmap[0x3e] = '\\u003e' # '>' |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
51 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
52 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
53 def jsonescapeu8fast(u8chars, paranoid): |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
54 """Convert a UTF-8 byte string to JSON-escaped form (fast path) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
55 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
56 Raises ValueError if non-ASCII characters have to be escaped. |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
57 """ |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
58 if paranoid: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
59 jm = _paranoidjsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
60 else: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
61 jm = _jsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
62 try: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
63 return ''.join(jm[x] for x in bytearray(u8chars)) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
64 except IndexError: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
65 raise ValueError |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
66 |
34218
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
67 if pycompat.ispy3: |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
68 _utf8strict = r'surrogatepass' |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
69 else: |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
70 _utf8strict = r'strict' |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
71 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
72 def jsonescapeu8fallback(u8chars, paranoid): |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
73 """Convert a UTF-8 byte string to JSON-escaped form (slow path) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
74 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
75 Escapes all non-ASCII characters no matter if paranoid is False. |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
76 """ |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
77 if paranoid: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
78 jm = _paranoidjsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
79 else: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
80 jm = _jsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
81 # non-BMP char is represented as UTF-16 surrogate pair |
34218
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
82 u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
83 u16codes = array.array(r'H', u16b) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
84 u16codes.pop(0) # drop BOM |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
85 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes) |