mercurial/pure/charencode.py
author Kyle Lippincott <spectral@google.com>
Tue, 08 Jan 2019 17:52:39 -0800
changeset 41138 8ddc5d8bea25
parent 34218 aa877860d4d7
child 43076 2372284d9457
permissions -rw-r--r--
tests: support passing testcase after .t paths that have path separators This probably could have been implemented by changing the regex above this bit of code, but I wasn't sure if it would end up handling various OSes correctly, so I decided to go with this version instead. Previously: $ tests/run-tests.py tests/test-ssh.t -l running 2 tests using 2 parallel processes .. # Ran 2 tests, 0 skipped, 0 failed. $ tests/run-tests.py tests/test-ssh.t#sshv1 -l running 0 tests using 0 parallel processes # Ran 0 tests, 0 skipped, 0 failed. Now: $ tests/run-tests.py tests/test-ssh.t -l running 2 tests using 2 parallel processes .. # Ran 2 tests, 0 skipped, 0 failed. $ tests/run-tests.py tests/test-ssh.t#sshv1 -l running 1 tests using 1 parallel processes . # Ran 1 tests, 0 skipped, 0 failed. Differential Revision: https://phab.mercurial-scm.org/D5535

# charencode.py - miscellaneous character encoding
#
#  Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.

from __future__ import absolute_import

import array

from .. import (
    pycompat,
)

def isasciistr(s):
    try:
        s.decode('ascii')
        return True
    except UnicodeDecodeError:
        return False

def asciilower(s):
    '''convert a string to lowercase if ASCII

    Raises UnicodeDecodeError if non-ASCII characters are found.'''
    s.decode('ascii')
    return s.lower()

def asciiupper(s):
    '''convert a string to uppercase if ASCII

    Raises UnicodeDecodeError if non-ASCII characters are found.'''
    s.decode('ascii')
    return s.upper()

_jsonmap = []
_jsonmap.extend("\\u%04x" % x for x in range(32))
_jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
_jsonmap.append('\\u007f')
_jsonmap[0x09] = '\\t'
_jsonmap[0x0a] = '\\n'
_jsonmap[0x22] = '\\"'
_jsonmap[0x5c] = '\\\\'
_jsonmap[0x08] = '\\b'
_jsonmap[0x0c] = '\\f'
_jsonmap[0x0d] = '\\r'
_paranoidjsonmap = _jsonmap[:]
_paranoidjsonmap[0x3c] = '\\u003c'  # '<' (e.g. escape "</script>")
_paranoidjsonmap[0x3e] = '\\u003e'  # '>'
_jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))

def jsonescapeu8fast(u8chars, paranoid):
    """Convert a UTF-8 byte string to JSON-escaped form (fast path)

    Raises ValueError if non-ASCII characters have to be escaped.
    """
    if paranoid:
        jm = _paranoidjsonmap
    else:
        jm = _jsonmap
    try:
        return ''.join(jm[x] for x in bytearray(u8chars))
    except IndexError:
        raise ValueError

if pycompat.ispy3:
    _utf8strict = r'surrogatepass'
else:
    _utf8strict = r'strict'

def jsonescapeu8fallback(u8chars, paranoid):
    """Convert a UTF-8 byte string to JSON-escaped form (slow path)

    Escapes all non-ASCII characters no matter if paranoid is False.
    """
    if paranoid:
        jm = _paranoidjsonmap
    else:
        jm = _jsonmap
    # non-BMP char is represented as UTF-16 surrogate pair
    u16b = u8chars.decode('utf-8', _utf8strict).encode('utf-16', _utf8strict)
    u16codes = array.array(r'H', u16b)
    u16codes.pop(0)  # drop BOM
    return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)