Mercurial > hg
annotate tests/test-encoding-func.py @ 50970:f04568add4ee
debug_revlog_stats: migrate `opts` to native kwargs
author | Matt Harbison <matt_harbison@yahoo.com> |
---|---|
date | Mon, 21 Aug 2023 17:47:52 -0400 |
parents | 6000f5b25c9b |
children |
rev | line source |
---|---|
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
1 import unittest |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
2 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
3 from mercurial import encoding |
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
4 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
5 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
6 class IsasciistrTest(unittest.TestCase): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
7 asciistrs = [ |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
8 b'a', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
9 b'ab', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
10 b'abc', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
11 b'abcd', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
12 b'abcde', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
13 b'abcdefghi', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
14 b'abcd\0fghi', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
15 ] |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
16 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
17 def testascii(self): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
18 for s in self.asciistrs: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
19 self.assertTrue(encoding.isasciistr(s)) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
20 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
21 def testnonasciichar(self): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
22 for s in self.asciistrs: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
23 for i in range(len(s)): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
24 t = bytearray(s) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
25 t[i] |= 0x80 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
26 self.assertFalse(encoding.isasciistr(bytes(t))) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
27 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
28 |
33927
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
29 class LocalEncodingTest(unittest.TestCase): |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
30 def testasciifastpath(self): |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
31 s = b'\0' * 100 |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
32 self.assertTrue(s is encoding.tolocal(s)) |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
33 self.assertTrue(s is encoding.fromlocal(s)) |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
34 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
35 |
33928
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
36 class Utf8bEncodingTest(unittest.TestCase): |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
37 def setUp(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
38 self.origencoding = encoding.encoding |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
39 |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
40 def tearDown(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
41 encoding.encoding = self.origencoding |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
42 |
33928
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
43 def testasciifastpath(self): |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
44 s = b'\0' * 100 |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
45 self.assertTrue(s is encoding.toutf8b(s)) |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
46 self.assertTrue(s is encoding.fromutf8b(s)) |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
47 |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
48 def testlossylatin(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
49 encoding.encoding = b'ascii' |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
50 s = u'\xc0'.encode('utf-8') |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
51 l = encoding.tolocal(s) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
52 self.assertEqual(l, b'?') # lossy |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
53 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
54 |
37947
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
55 def testlosslesslatin(self): |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
56 encoding.encoding = b'latin-1' |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
57 s = u'\xc0'.encode('utf-8') |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
58 l = encoding.tolocal(s) |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
59 self.assertEqual(l, b'\xc0') # lossless |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
60 self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
61 |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
62 def testlossy0xed(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
63 encoding.encoding = b'euc-kr' # U+Dxxx Hangul |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
64 s = u'\ud1bc\xc0'.encode('utf-8') |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
65 l = encoding.tolocal(s) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
66 self.assertIn(b'\xed', l) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
67 self.assertTrue(l.endswith(b'?')) # lossy |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
68 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
69 |
37947
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
70 def testlossless0xed(self): |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
71 encoding.encoding = b'euc-kr' # U+Dxxx Hangul |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
72 s = u'\ud1bc'.encode('utf-8') |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
73 l = encoding.tolocal(s) |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
74 self.assertEqual(l, b'\xc5\xed') # lossless |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
75 self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
76 |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
77 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
78 if __name__ == '__main__': |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
79 import silenttestrunner |
43076
2372284d9457
formatting: blacken the codebase
Augie Fackler <augie@google.com>
parents:
37947
diff
changeset
|
80 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
81 silenttestrunner.main(__name__) |