tests/test-encoding-func.py
author spectral <spectral@google.com>
Mon, 06 Aug 2018 12:52:22 -0700
changeset 38954 5a7df82de142
parent 37947 3ea3c96ada54
child 43076 2372284d9457
permissions -rw-r--r--
includematcher: separate "parents" from "dirs" A future patch will make use of this separation so that we can make more intelligent decisions about what to investigate/load when the matcher is in use. Currently, even with this patch, we typically use the 'visitdir' call to identify if we can skip some directory, something along the lines of: for f in all_items: if match.visitdir(f): <do stuff> This can be slower than we'd like if there are a lot of items; it requires N calls to match.visitdir in the best case. Commonly, especially with 'narrow', we have a situation where we do some work for the directory, possibly just loading it from disk (when using treemanifests) and then check if we should be interacting with it at all, which can be a huge slowdown in some pathological cases. Differential Revision: https://phab.mercurial-scm.org/D4129
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     1
from __future__ import absolute_import
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     2
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     3
import unittest
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     4
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     5
from mercurial import (
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     6
    encoding,
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     7
)
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     8
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
     9
class IsasciistrTest(unittest.TestCase):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    10
    asciistrs = [
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    11
        b'a',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    12
        b'ab',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    13
        b'abc',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    14
        b'abcd',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    15
        b'abcde',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    16
        b'abcdefghi',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    17
        b'abcd\0fghi',
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    18
    ]
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    19
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    20
    def testascii(self):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    21
        for s in self.asciistrs:
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    22
            self.assertTrue(encoding.isasciistr(s))
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    23
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    24
    def testnonasciichar(self):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    25
        for s in self.asciistrs:
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    26
            for i in range(len(s)):
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    27
                t = bytearray(s)
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    28
                t[i] |= 0x80
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    29
                self.assertFalse(encoding.isasciistr(bytes(t)))
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    30
33927
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    31
class LocalEncodingTest(unittest.TestCase):
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    32
    def testasciifastpath(self):
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    33
        s = b'\0' * 100
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    34
        self.assertTrue(s is encoding.tolocal(s))
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    35
        self.assertTrue(s is encoding.fromlocal(s))
853574db5b12 encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33926
diff changeset
    36
33928
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    37
class Utf8bEncodingTest(unittest.TestCase):
37946
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    38
    def setUp(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    39
        self.origencoding = encoding.encoding
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    40
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    41
    def tearDown(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    42
        encoding.encoding = self.origencoding
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    43
33928
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    44
    def testasciifastpath(self):
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    45
        s = b'\0' * 100
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    46
        self.assertTrue(s is encoding.toutf8b(s))
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    47
        self.assertTrue(s is encoding.fromutf8b(s))
6c119dbfd0c0 encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents: 33927
diff changeset
    48
37946
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    49
    def testlossylatin(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    50
        encoding.encoding = b'ascii'
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    51
        s = u'\xc0'.encode('utf-8')
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    52
        l = encoding.tolocal(s)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    53
        self.assertEqual(l, b'?')  # lossy
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    54
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    55
37947
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    56
    def testlosslesslatin(self):
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    57
        encoding.encoding = b'latin-1'
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    58
        s = u'\xc0'.encode('utf-8')
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    59
        l = encoding.tolocal(s)
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    60
        self.assertEqual(l, b'\xc0')  # lossless
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    61
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    62
37946
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    63
    def testlossy0xed(self):
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    64
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    65
        s = u'\ud1bc\xc0'.encode('utf-8')
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    66
        l = encoding.tolocal(s)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    67
        self.assertIn(b'\xed', l)
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    68
        self.assertTrue(l.endswith(b'?'))  # lossy
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    69
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved
57b0c7221dba encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents: 33928
diff changeset
    70
37947
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    71
    def testlossless0xed(self):
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    72
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    73
        s = u'\ud1bc'.encode('utf-8')
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    74
        l = encoding.tolocal(s)
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    75
        self.assertEqual(l, b'\xc5\xed')  # lossless
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    76
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8
3ea3c96ada54 encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents: 37946
diff changeset
    77
33926
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    78
if __name__ == '__main__':
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    79
    import silenttestrunner
f4433f2713d0 encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff changeset
    80
    silenttestrunner.main(__name__)