changeset 43415:a8454e846736 stable

import-checker: open all source files as utf-8 Before, we opened in text mode and used the default encoding to interpret the bytes within. This caused problems interpreting some byte sequences in some files. This commit changes things to always open files as UTF-8, which makes the error go away. test-check-module-imports.t now passes on Python 3.5 and 3.6 with this change. Differential Revision: https://phab.mercurial-scm.org/D7225
author Gregory Szorc <gregory.szorc@gmail.com>
date Mon, 04 Nov 2019 20:46:19 -0800
parents a0916e8819f6
children e7eb67eab53f
files contrib/import-checker.py
diffstat 1 files changed, 6 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/contrib/import-checker.py	Mon Nov 04 21:17:34 2019 -0800
+++ b/contrib/import-checker.py	Mon Nov 04 20:46:19 2019 -0800
@@ -4,6 +4,7 @@
 
 import ast
 import collections
+import io
 import os
 import sys
 
@@ -754,7 +755,11 @@
             yield src.read(), modname, f, 0
             py = True
     if py or f.endswith('.t'):
-        with open(f, 'r') as src:
+        # Strictly speaking we should sniff for the magic header that denotes
+        # Python source file encoding. But in reality we don't use anything
+        # other than ASCII (mainly) and UTF-8 (in a few exceptions), so
+        # simplicity is fine.
+        with io.open(f, 'r', encoding='utf-8') as src:
             for script, modname, t, line in embedded(f, modname, src):
                 yield script, modname.encode('utf8'), t, line