mercurial/pure/charencode.py
author Yuya Nishihara <yuya@tcha.org>
Sun, 23 Apr 2017 16:10:51 +0900
changeset 33942 b9101467d88b
parent 33782 f5fc54e7e467
child 33944 f4433f2713d0
permissions -rw-r--r--
encoding: extract stub for fast JSON escape This moves JSON character maps to pure/charencode.py because they will be used only when the fast-path fails.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33782
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     1
# charencode.py - miscellaneous character encoding
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     2
#
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     3
#  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     4
#
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     5
# This software may be used and distributed according to the terms of the
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     6
# GNU General Public License version 2 or any later version.
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     7
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     8
from __future__ import absolute_import
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     9
33942
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    10
import array
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    11
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    12
from .. import (
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    13
    pycompat,
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    14
)
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    15
33782
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    16
def asciilower(s):
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    17
    '''convert a string to lowercase if ASCII
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    18
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    19
    Raises UnicodeDecodeError if non-ASCII characters are found.'''
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    20
    s.decode('ascii')
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    21
    return s.lower()
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    22
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    23
def asciiupper(s):
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    24
    '''convert a string to uppercase if ASCII
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    25
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    26
    Raises UnicodeDecodeError if non-ASCII characters are found.'''
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    27
    s.decode('ascii')
f5fc54e7e467 encoding: drop circular import by proxying through '<policy>.charencode'
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    28
    return s.upper()
33942
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    29
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    30
_jsonmap = []
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    31
_jsonmap.extend("\\u%04x" % x for x in range(32))
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    32
_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    33
_jsonmap.append('\\u007f')
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    34
_jsonmap[0x09] = '\\t'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    35
_jsonmap[0x0a] = '\\n'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    36
_jsonmap[0x22] = '\\"'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    37
_jsonmap[0x5c] = '\\\\'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    38
_jsonmap[0x08] = '\\b'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    39
_jsonmap[0x0c] = '\\f'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    40
_jsonmap[0x0d] = '\\r'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    41
_paranoidjsonmap = _jsonmap[:]
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    42
_paranoidjsonmap[0x3c] = '\\u003c'  # '<' (e.g. escape "</script>")
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    43
_paranoidjsonmap[0x3e] = '\\u003e'  # '>'
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    44
_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    45
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    46
def jsonescapeu8fast(u8chars, paranoid):
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    47
    """Convert a UTF-8 byte string to JSON-escaped form (fast path)
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    48
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    49
    Raises ValueError if non-ASCII characters have to be escaped.
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    50
    """
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    51
    if paranoid:
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    52
        jm = _paranoidjsonmap
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    53
    else:
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    54
        jm = _jsonmap
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    55
    try:
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    56
        return ''.join(jm[x] for x in bytearray(u8chars))
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    57
    except IndexError:
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    58
        raise ValueError
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    59
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    60
def jsonescapeu8fallback(u8chars, paranoid):
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    61
    """Convert a UTF-8 byte string to JSON-escaped form (slow path)
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    62
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    63
    Escapes all non-ASCII characters no matter if paranoid is False.
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    64
    """
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    65
    if paranoid:
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    66
        jm = _paranoidjsonmap
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    67
    else:
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    68
        jm = _jsonmap
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    69
    # non-BMP char is represented as UTF-16 surrogate pair
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    70
    u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    71
    u16codes.pop(0)  # drop BOM
b9101467d88b encoding: extract stub for fast JSON escape
Yuya Nishihara <yuya@tcha.org>
parents: 33782
diff changeset
    72
    return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)