import-checker: open all source files as utf-8
Before, we opened in text mode and used the default encoding
to interpret the bytes within.
This caused problems interpreting some byte sequences in some
files.
This commit changes things to always open files as UTF-8, which
makes the error go away.
test-check-module-imports.t now passes on Python 3.5 and 3.6
with this change.
Differential Revision: https://phab.mercurial-scm.org/D7225
--- a/contrib/import-checker.py Mon Nov 04 21:17:34 2019 -0800
+++ b/contrib/import-checker.py Mon Nov 04 20:46:19 2019 -0800
@@ -4,6 +4,7 @@
import ast
import collections
+import io
import os
import sys
@@ -754,7 +755,11 @@
yield src.read(), modname, f, 0
py = True
if py or f.endswith('.t'):
- with open(f, 'r') as src:
+ # Strictly speaking we should sniff for the magic header that denotes
+ # Python source file encoding. But in reality we don't use anything
+ # other than ASCII (mainly) and UTF-8 (in a few exceptions), so
+ # simplicity is fine.
+ with io.open(f, 'r', encoding='utf-8') as src:
for script, modname, t, line in embedded(f, modname, src):
yield script, modname.encode('utf8'), t, line