Mercurial > hg
annotate mercurial/pure/charencode.py @ 52277:bf86e0b561d3 stable
rust: update install instruction
As discussed last Friday after publishing the rc wheel, we need to update the
rust readme and highlight that change in the release announcement if we want
people using Rust to keep being able to use pip.
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 20 Nov 2024 06:32:35 +0100 |
parents | f4733654f144 |
children |
rev | line source |
---|---|
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
1 # charencode.py - miscellaneous character encoding |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
2 # |
46819
d4ba4d51f85f
contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents:
45942
diff
changeset
|
3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
4 # |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
6 # GNU General Public License version 2 or any later version. |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
7 |
51863
f4733654f144
typing: add `from __future__ import annotations` to most files
Matt Harbison <matt_harbison@yahoo.com>
parents:
51722
diff
changeset
|
8 from __future__ import annotations |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
9 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
10 import array |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
11 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
12 from .. import pycompat |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
13 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
14 |
51722
43adbe03079b
typing: add type hints to the `charencode` module
Matt Harbison <matt_harbison@yahoo.com>
parents:
48907
diff
changeset
|
15 def isasciistr(s: bytes) -> bool: |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
16 try: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
17 s.decode('ascii') |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
18 return True |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
19 except UnicodeDecodeError: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
20 return False |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
21 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
22 |
51722
43adbe03079b
typing: add type hints to the `charencode` module
Matt Harbison <matt_harbison@yahoo.com>
parents:
48907
diff
changeset
|
23 def asciilower(s: bytes) -> bytes: |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
24 """convert a string to lowercase if ASCII |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
25 |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
26 Raises UnicodeDecodeError if non-ASCII characters are found.""" |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
27 s.decode('ascii') |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
28 return s.lower() |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
29 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
30 |
51722
43adbe03079b
typing: add type hints to the `charencode` module
Matt Harbison <matt_harbison@yahoo.com>
parents:
48907
diff
changeset
|
31 def asciiupper(s: bytes) -> bytes: |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
32 """convert a string to uppercase if ASCII |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
33 |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
34 Raises UnicodeDecodeError if non-ASCII characters are found.""" |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
35 s.decode('ascii') |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
36 return s.upper() |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
37 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
38 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
39 _jsonmap = [] |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
40 _jsonmap.extend(b"\\u%04x" % x for x in range(32)) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
41 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
42 _jsonmap.append(b'\\u007f') |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
43 _jsonmap[0x09] = b'\\t' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
44 _jsonmap[0x0A] = b'\\n' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
45 _jsonmap[0x22] = b'\\"' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
46 _jsonmap[0x5C] = b'\\\\' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
47 _jsonmap[0x08] = b'\\b' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
48 _jsonmap[0x0C] = b'\\f' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
49 _jsonmap[0x0D] = b'\\r' |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
50 _paranoidjsonmap = _jsonmap[:] |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
51 _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>") |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
52 _paranoidjsonmap[0x3E] = b'\\u003e' # '>' |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
53 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
54 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
55 |
51722
43adbe03079b
typing: add type hints to the `charencode` module
Matt Harbison <matt_harbison@yahoo.com>
parents:
48907
diff
changeset
|
56 def jsonescapeu8fast(u8chars: bytes, paranoid: bool) -> bytes: |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
57 """Convert a UTF-8 byte string to JSON-escaped form (fast path) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
58 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
59 Raises ValueError if non-ASCII characters have to be escaped. |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
60 """ |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
61 if paranoid: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
62 jm = _paranoidjsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
63 else: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
64 jm = _jsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
65 try: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
66 return b''.join(jm[x] for x in bytearray(u8chars)) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
67 except IndexError: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
68 raise ValueError |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
69 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
70 |
48907
b677bccf74b9
charencode: remove Python 2 support code
Gregory Szorc <gregory.szorc@gmail.com>
parents:
48875
diff
changeset
|
71 _utf8strict = r'surrogatepass' |
34218
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
72 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
73 |
51722
43adbe03079b
typing: add type hints to the `charencode` module
Matt Harbison <matt_harbison@yahoo.com>
parents:
48907
diff
changeset
|
74 def jsonescapeu8fallback(u8chars: bytes, paranoid: bool) -> bytes: |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
75 """Convert a UTF-8 byte string to JSON-escaped form (slow path) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
76 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
77 Escapes all non-ASCII characters no matter if paranoid is False. |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
78 """ |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
79 if paranoid: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
80 jm = _paranoidjsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
81 else: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
82 jm = _jsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
83 # non-BMP char is represented as UTF-16 surrogate pair |
34218
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
84 u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) |
43506
9f70512ae2cf
cleanup: remove pointless r-prefixes on single-quoted strings
Augie Fackler <augie@google.com>
parents:
43077
diff
changeset
|
85 u16codes = array.array('H', u16b) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
86 u16codes.pop(0) # drop BOM |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
87 return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes) |