Allow the user to specify the fallback encoding for the changelog
Example: use EUC-JP instead of ISO-8859-1:
[ui]
fallbackencoding = EUC-JP
--- a/doc/hgrc.5.txt Fri Dec 08 23:33:32 2006 +0100
+++ b/doc/hgrc.5.txt Fri Dec 08 22:01:05 2006 -0200
@@ -388,6 +388,9 @@
Print debugging information. True or False. Default is False.
editor;;
The editor to use during a commit. Default is $EDITOR or "vi".
+ fallbackencoding;;
+ Encoding to try if it's not possible to decode the changelog using
+ UTF-8. Default is ISO-8859-1.
ignore;;
A file to read per-user ignore patterns from. This file should be in
the same format as a repository-wide .hgignore file. This option
--- a/mercurial/localrepo.py Fri Dec 08 23:33:32 2006 +0100
+++ b/mercurial/localrepo.py Fri Dec 08 22:01:05 2006 -0200
@@ -72,6 +72,10 @@
self.manifest = manifest.manifest(self.sopener, v)
self.changelog = changelog.changelog(self.sopener, v)
+ fallback = self.ui.config('ui', 'fallbackencoding')
+ if fallback:
+ util._fallbackencoding = fallback
+
# the changelog might not have the inline index flag
# on. If the format of the changelog is the same as found in
# .hgrc, apply any flags found in the .hgrc as well.
--- a/mercurial/util.py Fri Dec 08 23:33:32 2006 +0100
+++ b/mercurial/util.py Fri Dec 08 22:01:05 2006 -0200
@@ -19,6 +19,7 @@
_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
_encodingmode = os.environ.get("HGENCODINGMODE", "strict")
+_fallbackencoding = 'ISO-8859-1'
def tolocal(s):
"""
@@ -30,7 +31,7 @@
using UTF-8, then Latin-1, and failing that, we use UTF-8 and
replace unknown characters.
"""
- for e in "utf-8 latin1".split():
+ for e in ('UTF-8', _fallbackencoding):
try:
u = s.decode(e) # attempt strict decoding
return u.encode(_encoding, "replace")
Binary file tests/legacy-encoding.hg has changed
--- a/tests/test-encoding Fri Dec 08 23:33:32 2006 +0100
+++ b/tests/test-encoding Fri Dec 08 22:01:05 2006 -0200
@@ -47,3 +47,8 @@
HGENCODING=latin-1 hg branches
echo % utf-8
HGENCODING=utf-8 hg branches
+
+echo '[ui]' >> .hg/hgrc
+echo 'fallbackencoding = euc-jp' >> .hg/hgrc
+echo % utf-8
+HGENCODING=utf-8 hg log
--- a/tests/test-encoding.out Fri Dec 08 23:33:32 2006 +0100
+++ b/tests/test-encoding.out Fri Dec 08 22:01:05 2006 -0200
@@ -1,7 +1,7 @@
adding changesets
adding manifests
adding file changes
-added 1 changesets with 1 changes to 1 files
+added 2 changesets with 2 changes to 1 files
(run 'hg update' to get a working copy)
1 files updated, 0 files merged, 0 files removed, 0 files unresolved
% should fail with encoding error
@@ -15,104 +15,153 @@
rollback completed
% these should work
% ascii
-changeset: 4:d8a5d9eaf41e
+changeset: 5:e4ed49b8a8f0
branch: ?
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
-changeset: 3:5edfc7acb541
+changeset: 4:a02ca5a58e99
user: test
date: Thu Jan 01 00:00:00 1970 +0000
-summary: Added tag ? for changeset 91878608adb3
+summary: Added tag ? for changeset d47908dab82f
-changeset: 2:91878608adb3
+changeset: 3:d47908dab82f
tag: ?
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: ?
-changeset: 1:6355cacf842e
+changeset: 2:9db1985f3097
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: ?
+changeset: 1:af6e0db4427c
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: euc-jp: ?????? = u'\u65e5\u672c\u8a9e'
+
changeset: 0:60aad1dd20a9
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e': ?
% latin-1
-changeset: 4:d8a5d9eaf41e
+changeset: 5:e4ed49b8a8f0
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
-changeset: 3:5edfc7acb541
+changeset: 4:a02ca5a58e99
user: test
date: Thu Jan 01 00:00:00 1970 +0000
-summary: Added tag é for changeset 91878608adb3
+summary: Added tag é for changeset d47908dab82f
-changeset: 2:91878608adb3
+changeset: 3:d47908dab82f
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
-changeset: 1:6355cacf842e
+changeset: 2:9db1985f3097
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
+changeset: 1:af6e0db4427c
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: euc-jp: ÆüËܸì = u'\u65e5\u672c\u8a9e'
+
changeset: 0:60aad1dd20a9
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e': é
% utf-8
-changeset: 4:d8a5d9eaf41e
+changeset: 5:e4ed49b8a8f0
branch: é
tag: tip
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin1 branch
-changeset: 3:5edfc7acb541
+changeset: 4:a02ca5a58e99
user: test
date: Thu Jan 01 00:00:00 1970 +0000
-summary: Added tag é for changeset 91878608adb3
+summary: Added tag é for changeset d47908dab82f
-changeset: 2:91878608adb3
+changeset: 3:d47908dab82f
tag: é
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: utf-8 e' encoded: é
-changeset: 1:6355cacf842e
+changeset: 2:9db1985f3097
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e' encoded: é
+changeset: 1:af6e0db4427c
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: euc-jp: ÆüËܸì = u'\u65e5\u672c\u8a9e'
+
changeset: 0:60aad1dd20a9
user: test
date: Thu Jan 01 00:00:00 1970 +0000
summary: latin-1 e': é
% ascii
-tip 4:d8a5d9eaf41e
-? 2:91878608adb3
+tip 5:e4ed49b8a8f0
+? 3:d47908dab82f
% latin-1
-tip 4:d8a5d9eaf41e
-é 2:91878608adb3
+tip 5:e4ed49b8a8f0
+é 3:d47908dab82f
+% utf-8
+tip 5:e4ed49b8a8f0
+é 3:d47908dab82f
+% ascii
+? 5:e4ed49b8a8f0
+% latin-1
+é 5:e4ed49b8a8f0
+% utf-8
+é 5:e4ed49b8a8f0
% utf-8
-tip 4:d8a5d9eaf41e
-é 2:91878608adb3
-% ascii
-? 4:d8a5d9eaf41e
-% latin-1
-é 4:d8a5d9eaf41e
-% utf-8
-é 4:d8a5d9eaf41e
+changeset: 5:e4ed49b8a8f0
+branch: é
+tag: tip
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin1 branch
+
+changeset: 4:a02ca5a58e99
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: Added tag é for changeset d47908dab82f
+
+changeset: 3:d47908dab82f
+tag: é
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: utf-8 e' encoded: é
+
+changeset: 2:9db1985f3097
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e' encoded: é
+
+changeset: 1:af6e0db4427c
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: euc-jp: 日本語 = u'\u65e5\u672c\u8a9e'
+
+changeset: 0:60aad1dd20a9
+user: test
+date: Thu Jan 01 00:00:00 1970 +0000
+summary: latin-1 e': �
+