tests/test-encoding-func.py
author Pierre-Yves David <pierre-yves.david@octobus.net>
Tue, 26 Sep 2023 02:49:18 +0200
changeset 51030 9011c38b4f65
parent 48966 6000f5b25c9b
permissions -rw-r--r--
revlog: use a `reading` context in `_enforceinlinesize` We are about to enforce reading context on various operation, so we make sure top level method are in the right context. In the future we might move the responsibility of opening the revlog for reading higher in the call chain but lets limit the disruption for now.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     1
import unittest
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     2
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37991
diff changeset
     3
from mercurial import encoding
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37991
diff changeset
     4
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     5
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     6
class IsasciistrTest(unittest.TestCase):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     7
    asciistrs = [
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     8
        b'a',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     9
        b'ab',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    10
        b'abc',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    11
        b'abcd',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    12
        b'abcde',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    13
        b'abcdefghi',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    14
        b'abcd\0fghi',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    15
    ]
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    16
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    17
    def testascii(self):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    18
        for s in self.asciistrs:
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    19
            self.assertTrue(encoding.isasciistr(s))
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    20
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    21
    def testnonasciichar(self):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    22
        for s in self.asciistrs:
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    23
            for i in range(len(s)):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    24
                t = bytearray(s)
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    25
                t[i] |= 0x80
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    26
                self.assertFalse(encoding.isasciistr(bytes(t)))
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    27
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37991
diff changeset
    28
33945
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
    29
class LocalEncodingTest(unittest.TestCase):
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
    30
    def testasciifastpath(self):
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
    31
        s = b'\0' * 100
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
    32
        self.assertTrue(s is encoding.tolocal(s))
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
    33
        self.assertTrue(s is encoding.fromlocal(s))
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33944
diff changeset
    34
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37991
diff changeset
    35
33946
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33945
diff changeset
    36
class Utf8bEncodingTest(unittest.TestCase):
37990
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    37
    def setUp(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    38
        self.origencoding = encoding.encoding
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    39
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    40
    def tearDown(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    41
        encoding.encoding = self.origencoding
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    42
33946
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33945
diff changeset
    43
    def testasciifastpath(self):
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33945
diff changeset
    44
        s = b'\0' * 100
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33945
diff changeset
    45
        self.assertTrue(s is encoding.toutf8b(s))
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33945
diff changeset
    46
        self.assertTrue(s is encoding.fromutf8b(s))
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33945
diff changeset
    47
37990
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    48
    def testlossylatin(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    49
        encoding.encoding = b'ascii'
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    50
        s = u'\xc0'.encode('utf-8')
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    51
        l = encoding.tolocal(s)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    52
        self.assertEqual(l, b'?')  # lossy
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    53
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    54
37991
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    55
    def testlosslesslatin(self):
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    56
        encoding.encoding = b'latin-1'
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    57
        s = u'\xc0'.encode('utf-8')
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    58
        l = encoding.tolocal(s)
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    59
        self.assertEqual(l, b'\xc0')  # lossless
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    60
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    61
37990
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    62
    def testlossy0xed(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    63
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    64
        s = u'\ud1bc\xc0'.encode('utf-8')
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    65
        l = encoding.tolocal(s)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    66
        self.assertIn(b'\xed', l)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    67
        self.assertTrue(l.endswith(b'?'))  # lossy
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    68
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33946
diff changeset
    69
37991
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    70
    def testlossless0xed(self):
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    71
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    72
        s = u'\ud1bc'.encode('utf-8')
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    73
        l = encoding.tolocal(s)
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    74
        self.assertEqual(l, b'\xc5\xed')  # lossless
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    75
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37990
diff changeset
    76
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37991
diff changeset
    77
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    78
if __name__ == '__main__':
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    79
    import silenttestrunner
43076
2372284d9457 formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents: 37991
diff changeset
    80
33944
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    81
    silenttestrunner.main(__name__)