Mercurial > hg
annotate tests/test-encoding-func.py @ 47002:8fc2c867af54 stable
Added tag 5.8rc1 for changeset 8d2b62d716b0
author | Augie Fackler <raf@durin42.com> |
---|---|
date | Wed, 21 Apr 2021 10:49:28 -0400 |
parents | 2372284d9457 |
children | 6000f5b25c9b |
rev | line source |
---|---|
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
1 from __future__ import absolute_import |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
2 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
3 import unittest |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
4 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
5 from mercurial import encoding |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
6 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
7 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
8 class IsasciistrTest(unittest.TestCase): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
9 asciistrs = [ |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
10 b'a', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
11 b'ab', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
12 b'abc', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
13 b'abcd', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
14 b'abcde', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
15 b'abcdefghi', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
16 b'abcd\0fghi', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
17 ] |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
18 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
19 def testascii(self): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
20 for s in self.asciistrs: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
21 self.assertTrue(encoding.isasciistr(s)) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
22 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
23 def testnonasciichar(self): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
24 for s in self.asciistrs: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
25 for i in range(len(s)): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
26 t = bytearray(s) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
27 t[i] |= 0x80 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
28 self.assertFalse(encoding.isasciistr(bytes(t))) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
29 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
30 |
33927
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
31 class LocalEncodingTest(unittest.TestCase): |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
32 def testasciifastpath(self): |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
33 s = b'\0' * 100 |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
34 self.assertTrue(s is encoding.tolocal(s)) |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
35 self.assertTrue(s is encoding.fromlocal(s)) |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
36 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
37 |
33928
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
38 class Utf8bEncodingTest(unittest.TestCase): |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
39 def setUp(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
40 self.origencoding = encoding.encoding |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
41 |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
42 def tearDown(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
43 encoding.encoding = self.origencoding |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
44 |
33928
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
45 def testasciifastpath(self): |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
46 s = b'\0' * 100 |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
47 self.assertTrue(s is encoding.toutf8b(s)) |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
48 self.assertTrue(s is encoding.fromutf8b(s)) |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
49 |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
50 def testlossylatin(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
51 encoding.encoding = b'ascii' |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
52 s = u'\xc0'.encode('utf-8') |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
53 l = encoding.tolocal(s) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
54 self.assertEqual(l, b'?') # lossy |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
55 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
56 |
37947
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
57 def testlosslesslatin(self): |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
58 encoding.encoding = b'latin-1' |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
59 s = u'\xc0'.encode('utf-8') |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
60 l = encoding.tolocal(s) |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
61 self.assertEqual(l, b'\xc0') # lossless |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
62 self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
63 |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
64 def testlossy0xed(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
65 encoding.encoding = b'euc-kr' # U+Dxxx Hangul |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
66 s = u'\ud1bc\xc0'.encode('utf-8') |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
67 l = encoding.tolocal(s) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
68 self.assertIn(b'\xed', l) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
69 self.assertTrue(l.endswith(b'?')) # lossy |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
70 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
71 |
37947
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
72 def testlossless0xed(self): |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
73 encoding.encoding = b'euc-kr' # U+Dxxx Hangul |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
74 s = u'\ud1bc'.encode('utf-8') |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
75 l = encoding.tolocal(s) |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
76 self.assertEqual(l, b'\xc5\xed') # lossless |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
77 self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
78 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
79 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
80 if __name__ == '__main__': |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
81 import silenttestrunner |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
82 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
83 silenttestrunner.main(__name__) |