Mercurial > hg
annotate tests/test-encoding-func.py @ 42194:0da689a60163
fix: allow fixer tools to return metadata in addition to the file content
With this change, fixer tools can be configured to output a JSON object that
will be parsed and passed to hooks that can be used to print summaries of what
code was formatted or perform other post-fixing work.
The motivation for this change is to allow parallel executions of a
"meta-formatter" tool to report back statistics, which are then aggregated and
processed after all formatting has completed. Providing an extensible mechanism
inside fix.py is far simpler, and more portable, than trying to make a tool
like this communicate through some other channel.
Differential Revision: https://phab.mercurial-scm.org/D6167
author | Danny Hooper <hooper@google.com> |
---|---|
date | Thu, 21 Mar 2019 18:32:45 -0700 |
parents | 3ea3c96ada54 |
children | 2372284d9457 |
rev | line source |
---|---|
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
1 from __future__ import absolute_import |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
2 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
3 import unittest |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
4 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
5 from mercurial import ( |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
6 encoding, |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
7 ) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
8 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
9 class IsasciistrTest(unittest.TestCase): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
10 asciistrs = [ |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
11 b'a', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
12 b'ab', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
13 b'abc', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
14 b'abcd', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
15 b'abcde', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
16 b'abcdefghi', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
17 b'abcd\0fghi', |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
18 ] |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
19 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
20 def testascii(self): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
21 for s in self.asciistrs: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
22 self.assertTrue(encoding.isasciistr(s)) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
23 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
24 def testnonasciichar(self): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
25 for s in self.asciistrs: |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
26 for i in range(len(s)): |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
27 t = bytearray(s) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
28 t[i] |= 0x80 |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
29 self.assertFalse(encoding.isasciistr(bytes(t))) |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
30 |
33927
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
31 class LocalEncodingTest(unittest.TestCase): |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
32 def testasciifastpath(self): |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
33 s = b'\0' * 100 |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
34 self.assertTrue(s is encoding.tolocal(s)) |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
35 self.assertTrue(s is encoding.fromlocal(s)) |
853574db5b12
encoding: add fast path of from/tolocal() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33926
diff
changeset
|
36 |
33928
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
37 class Utf8bEncodingTest(unittest.TestCase): |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
38 def setUp(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
39 self.origencoding = encoding.encoding |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
40 |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
41 def tearDown(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
42 encoding.encoding = self.origencoding |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
43 |
33928
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
44 def testasciifastpath(self): |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
45 s = b'\0' * 100 |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
46 self.assertTrue(s is encoding.toutf8b(s)) |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
47 self.assertTrue(s is encoding.fromutf8b(s)) |
6c119dbfd0c0
encoding: add fast path of from/toutf8b() for ASCII strings
Yuya Nishihara <yuya@tcha.org>
parents:
33927
diff
changeset
|
48 |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
49 def testlossylatin(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
50 encoding.encoding = b'ascii' |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
51 s = u'\xc0'.encode('utf-8') |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
52 l = encoding.tolocal(s) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
53 self.assertEqual(l, b'?') # lossy |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
54 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
55 |
37947
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
56 def testlosslesslatin(self): |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
57 encoding.encoding = b'latin-1' |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
58 s = u'\xc0'.encode('utf-8') |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
59 l = encoding.tolocal(s) |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
60 self.assertEqual(l, b'\xc0') # lossless |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
61 self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
62 |
37946
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
63 def testlossy0xed(self): |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
64 encoding.encoding = b'euc-kr' # U+Dxxx Hangul |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
65 s = u'\ud1bc\xc0'.encode('utf-8') |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
66 l = encoding.tolocal(s) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
67 self.assertIn(b'\xed', l) |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
68 self.assertTrue(l.endswith(b'?')) # lossy |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
69 self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved |
57b0c7221dba
encoding: fix toutf8b() to resurrect lossy characters even if "\xed" in it
Yuya Nishihara <yuya@tcha.org>
parents:
33928
diff
changeset
|
70 |
37947
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
71 def testlossless0xed(self): |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
72 encoding.encoding = b'euc-kr' # U+Dxxx Hangul |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
73 s = u'\ud1bc'.encode('utf-8') |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
74 l = encoding.tolocal(s) |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
75 self.assertEqual(l, b'\xc5\xed') # lossless |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
76 self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 |
3ea3c96ada54
encoding: introduce tagging type for non-lossy non-ASCII string
Yuya Nishihara <yuya@tcha.org>
parents:
37946
diff
changeset
|
77 |
33926
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
78 if __name__ == '__main__': |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
79 import silenttestrunner |
f4433f2713d0
encoding: add function to test if a str consists of ASCII characters
Yuya Nishihara <yuya@tcha.org>
parents:
diff
changeset
|
80 silenttestrunner.main(__name__) |