Mercurial > hg
annotate mercurial/pure/charencode.py @ 47421:d2e0226b511a
transaction: narrow the error filtering when failing to rename undo file
Having inconsistent "undo" files can be quite serious so we narrow the error
filtering to the intent that the comment explain.
(This is an opportunity improvement while looking at something next to that.)
Differential Revision: https://phab.mercurial-scm.org/D10845
author | Pierre-Yves David <pierre-yves.david@octobus.net> |
---|---|
date | Wed, 09 Jun 2021 00:59:04 +0200 |
parents | d4ba4d51f85f |
children | 6000f5b25c9b |
rev | line source |
---|---|
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
1 # charencode.py - miscellaneous character encoding |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
2 # |
46819
d4ba4d51f85f
contributor: change mentions of mpm to olivia
Raphaël Gomès <rgomes@octobus.net>
parents:
45942
diff
changeset
|
3 # Copyright 2005-2009 Olivia Mackall <olivia@selenic.com> and others |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
4 # |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
5 # This software may be used and distributed according to the terms of the |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
6 # GNU General Public License version 2 or any later version. |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
7 |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
8 from __future__ import absolute_import |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
9 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
10 import array |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
11 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
12 from .. import pycompat |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
13 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
14 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
15 def isasciistr(s): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
16 try: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
17 s.decode('ascii') |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
18 return True |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
19 except UnicodeDecodeError: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
20 return False |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
33924
diff
changeset
|
21 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
22 |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
23 def asciilower(s): |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
24 """convert a string to lowercase if ASCII |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
25 |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
26 Raises UnicodeDecodeError if non-ASCII characters are found.""" |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
27 s.decode('ascii') |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
28 return s.lower() |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
29 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
30 |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
31 def asciiupper(s): |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
32 """convert a string to uppercase if ASCII |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
33 |
45942
89a2afe31e82
formating: upgrade to black 20.8b1
Augie Fackler <raf@durin42.com>
parents:
43506
diff
changeset
|
34 Raises UnicodeDecodeError if non-ASCII characters are found.""" |
33761
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
35 s.decode('ascii') |
f5fc54e7e467
encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
36 return s.upper() |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
37 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
38 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
39 _jsonmap = [] |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
40 _jsonmap.extend(b"\\u%04x" % x for x in range(32)) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
41 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127)) |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
42 _jsonmap.append(b'\\u007f') |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
43 _jsonmap[0x09] = b'\\t' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
44 _jsonmap[0x0A] = b'\\n' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
45 _jsonmap[0x22] = b'\\"' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
46 _jsonmap[0x5C] = b'\\\\' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
47 _jsonmap[0x08] = b'\\b' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
48 _jsonmap[0x0C] = b'\\f' |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
49 _jsonmap[0x0D] = b'\\r' |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
50 _paranoidjsonmap = _jsonmap[:] |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
51 _paranoidjsonmap[0x3C] = b'\\u003c' # '<' (e.g. escape "</script>") |
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
52 _paranoidjsonmap[0x3E] = b'\\u003e' # '>' |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
53 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256)) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
54 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
55 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
56 def jsonescapeu8fast(u8chars, paranoid): |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
57 """Convert a UTF-8 byte string to JSON-escaped form (fast path) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
58 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
59 Raises ValueError if non-ASCII characters have to be escaped. |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
60 """ |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
61 if paranoid: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
62 jm = _paranoidjsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
63 else: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
64 jm = _jsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
65 try: |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
66 return b''.join(jm[x] for x in bytearray(u8chars)) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
67 except IndexError: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
68 raise ValueError |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
69 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
70 |
34218
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
71 if pycompat.ispy3: |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
72 _utf8strict = r'surrogatepass' |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
73 else: |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
74 _utf8strict = r'strict' |
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
75 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
34218
diff
changeset
|
76 |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
77 def jsonescapeu8fallback(u8chars, paranoid): |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
78 """Convert a UTF-8 byte string to JSON-escaped form (slow path) |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
79 |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
80 Escapes all non-ASCII characters no matter if paranoid is False. |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
81 """ |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
82 if paranoid: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
83 jm = _paranoidjsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
84 else: |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
85 jm = _jsonmap |
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
86 # non-BMP char is represented as UTF-16 surrogate pair |
34218
aa877860d4d7
py3: use 'surrogatepass' error handler to process U+DCxx transparently
Yuya Nishihara <yuya@tcha.org>
parents:
34217
diff
changeset
|
87 u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict) |
43506
9f70512ae2cf
cleanup: remove pointless r-prefixes on single-quoted strings
Augie Fackler <augie@google.com>
parents:
43077
diff
changeset
|
88 u16codes = array.array('H', u16b) |
33924
b9101467d88b
encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents:
33761
diff
changeset
|
89 u16codes.pop(0) # drop BOM |
43077
687b865b95ad
formatting: byteify all mercurial/ and hgext/ string literals
Augie Fackler <augie@google.com>
parents:
43076
diff
changeset
|
90 return b''.join(jm[x] if x < 128 else b'\\u%04x' % x for x in u16codes) |