compat: initialize LC_CTYPE locale on all Python versions and platforms
Previously, the LC_CTYPE locale was not initialized according to user settings
on all Python versions (e.g. never on Python 2) and platforms (e.g. not on
some Python < 3.8 on Windows).
This broke e.g. non-ASCII filenames passed to the Subversion bindings on Python
2, resulting in error messages like "file:///tmp/a%C3%A4 does not look like a
Subversion repository to libsvn version 1.14.0".
The following command could be used to test this functionality. Adding it to the
test suite would be pointless, as the locale is always set to "C" during test
runs.
@command(b'check_initial_codeset', norepo=True)
def check_initial_codeset(ui):
codeset1 = locale.nl_langinfo(locale.CODESET)
locale.setlocale(locale.LC_ALL, '')
codeset2 = locale.nl_langinfo(locale.CODESET)
assert codeset1 == codeset2
--- a/mercurial/pycompat.py Thu Jun 25 10:32:51 2020 -0700
+++ b/mercurial/pycompat.py Fri Jun 26 04:07:50 2020 +0200
@@ -13,6 +13,7 @@
import getopt
import inspect
import json
+import locale
import os
import shlex
import sys
@@ -93,6 +94,26 @@
return _rapply(f, xs)
+# Passing the '' locale means that the locale should be set according to the
+# user settings (environment variables).
+# Python sometimes avoids setting the global locale settings. When interfacing
+# with C code (e.g. the curses module or the Subversion bindings), the global
+# locale settings must be initialized correctly. Python 2 does not initialize
+# the global locale settings on interpreter startup. Python 3 sometimes
+# initializes LC_CTYPE, but not consistently at least on Windows. Therefore we
+# explicitly initialize it to get consistent behavior if it's not already
+# initialized. Since CPython commit 177d921c8c03d30daa32994362023f777624b10d,
+# LC_CTYPE is always initialized. If we require Python 3.8+, we should re-check
+# if we can remove this code.
+if locale.setlocale(locale.LC_CTYPE, None) == 'C':
+ try:
+ locale.setlocale(locale.LC_CTYPE, '')
+ except locale.Error:
+ # The likely case is that the locale from the environment variables is
+ # unknown.
+ pass
+
+
if ispy3:
import builtins
import codecs