# HG changeset patch # User Gregory Szorc # Date 1572929179 28800 # Node ID a8454e84673641e0132692df2d151a3804e8cc50 # Parent a0916e8819f6e8afc7e3ac966723811ae4057d44 import-checker: open all source files as utf-8 Before, we opened in text mode and used the default encoding to interpret the bytes within. This caused problems interpreting some byte sequences in some files. This commit changes things to always open files as UTF-8, which makes the error go away. test-check-module-imports.t now passes on Python 3.5 and 3.6 with this change. Differential Revision: https://phab.mercurial-scm.org/D7225 diff -r a0916e8819f6 -r a8454e846736 contrib/import-checker.py --- a/contrib/import-checker.py Mon Nov 04 21:17:34 2019 -0800 +++ b/contrib/import-checker.py Mon Nov 04 20:46:19 2019 -0800 @@ -4,6 +4,7 @@ import ast import collections +import io import os import sys @@ -754,7 +755,11 @@ yield src.read(), modname, f, 0 py = True if py or f.endswith('.t'): - with open(f, 'r') as src: + # Strictly speaking we should sniff for the magic header that denotes + # Python source file encoding. But in reality we don't use anything + # other than ASCII (mainly) and UTF-8 (in a few exceptions), so + # simplicity is fine. + with io.open(f, 'r', encoding='utf-8') as src: for script, modname, t, line in embedded(f, modname, src): yield script, modname.encode('utf8'), t, line