Allow the user to specify the fallback encoding for the changelog
authorAlexis S. L. Carvalho <alexis@cecm.usp.br>
Fri, 08 Dec 2006 22:01:05 -0200
changeset 3835 d1ce5461beed
parent 3834 a7b61c3b0f93
child 3836 925b1816c746
Allow the user to specify the fallback encoding for the changelog Example: use EUC-JP instead of ISO-8859-1: [ui] fallbackencoding = EUC-JP
doc/hgrc.5.txt
mercurial/localrepo.py
mercurial/util.py
tests/legacy-encoding.hg
tests/test-encoding
tests/test-encoding.out
--- a/doc/hgrc.5.txt	Fri Dec 08 23:33:32 2006 +0100
+++ b/doc/hgrc.5.txt	Fri Dec 08 22:01:05 2006 -0200
@@ -388,6 +388,9 @@
     Print debugging information.  True or False.  Default is False.
   editor;;
     The editor to use during a commit.  Default is $EDITOR or "vi".
+  fallbackencoding;;
+    Encoding to try if it's not possible to decode the changelog using
+    UTF-8.  Default is ISO-8859-1.
   ignore;;
     A file to read per-user ignore patterns from. This file should be in
     the same format as a repository-wide .hgignore file. This option
--- a/mercurial/localrepo.py	Fri Dec 08 23:33:32 2006 +0100
+++ b/mercurial/localrepo.py	Fri Dec 08 22:01:05 2006 -0200
@@ -72,6 +72,10 @@
         self.manifest = manifest.manifest(self.sopener, v)
         self.changelog = changelog.changelog(self.sopener, v)
 
+        fallback = self.ui.config('ui', 'fallbackencoding')
+        if fallback:
+            util._fallbackencoding = fallback
+
         # the changelog might not have the inline index flag
         # on.  If the format of the changelog is the same as found in
         # .hgrc, apply any flags found in the .hgrc as well.
--- a/mercurial/util.py	Fri Dec 08 23:33:32 2006 +0100
+++ b/mercurial/util.py	Fri Dec 08 22:01:05 2006 -0200
@@ -19,6 +19,7 @@
 
 _encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
+_fallbackencoding = 'ISO-8859-1'
 
 def tolocal(s):
     """
@@ -30,7 +31,7 @@
     using UTF-8, then Latin-1, and failing that, we use UTF-8 and
     replace unknown characters.
     """
-    for e in "utf-8 latin1".split():
+    for e in ('UTF-8', _fallbackencoding):
         try:
             u = s.decode(e) # attempt strict decoding
             return u.encode(_encoding, "replace")
Binary file tests/legacy-encoding.hg has changed
--- a/tests/test-encoding	Fri Dec 08 23:33:32 2006 +0100
+++ b/tests/test-encoding	Fri Dec 08 22:01:05 2006 -0200
@@ -47,3 +47,8 @@
 HGENCODING=latin-1 hg branches
 echo % utf-8
 HGENCODING=utf-8 hg branches
+
+echo '[ui]' >> .hg/hgrc
+echo 'fallbackencoding = euc-jp' >> .hg/hgrc
+echo % utf-8
+HGENCODING=utf-8 hg log
--- a/tests/test-encoding.out	Fri Dec 08 23:33:32 2006 +0100
+++ b/tests/test-encoding.out	Fri Dec 08 22:01:05 2006 -0200
@@ -1,7 +1,7 @@
 adding changesets
 adding manifests
 adding file changes
-added 1 changesets with 1 changes to 1 files
+added 2 changesets with 2 changes to 1 files
 (run 'hg update' to get a working copy)
 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
 % should fail with encoding error
@@ -15,104 +15,153 @@
 rollback completed
 % these should work
 % ascii
-changeset:   4:d8a5d9eaf41e
+changeset:   5:e4ed49b8a8f0
 branch:      ?
 tag:         tip
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin1 branch
 
-changeset:   3:5edfc7acb541
+changeset:   4:a02ca5a58e99
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
-summary:     Added tag ? for changeset 91878608adb3
+summary:     Added tag ? for changeset d47908dab82f
 
-changeset:   2:91878608adb3
+changeset:   3:d47908dab82f
 tag:         ?
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     utf-8 e' encoded: ?
 
-changeset:   1:6355cacf842e
+changeset:   2:9db1985f3097
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin-1 e' encoded: ?
 
+changeset:   1:af6e0db4427c
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     euc-jp: ?????? = u'\u65e5\u672c\u8a9e'
+
 changeset:   0:60aad1dd20a9
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin-1 e': ?
 
 % latin-1
-changeset:   4:d8a5d9eaf41e
+changeset:   5:e4ed49b8a8f0
 branch:      é
 tag:         tip
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin1 branch
 
-changeset:   3:5edfc7acb541
+changeset:   4:a02ca5a58e99
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
-summary:     Added tag é for changeset 91878608adb3
+summary:     Added tag é for changeset d47908dab82f
 
-changeset:   2:91878608adb3
+changeset:   3:d47908dab82f
 tag:         é
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     utf-8 e' encoded: é
 
-changeset:   1:6355cacf842e
+changeset:   2:9db1985f3097
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin-1 e' encoded: é
 
+changeset:   1:af6e0db4427c
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     euc-jp: ÆüËܸì = u'\u65e5\u672c\u8a9e'
+
 changeset:   0:60aad1dd20a9
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin-1 e': é
 
 % utf-8
-changeset:   4:d8a5d9eaf41e
+changeset:   5:e4ed49b8a8f0
 branch:      é
 tag:         tip
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin1 branch
 
-changeset:   3:5edfc7acb541
+changeset:   4:a02ca5a58e99
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
-summary:     Added tag é for changeset 91878608adb3
+summary:     Added tag é for changeset d47908dab82f
 
-changeset:   2:91878608adb3
+changeset:   3:d47908dab82f
 tag:         é
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     utf-8 e' encoded: é
 
-changeset:   1:6355cacf842e
+changeset:   2:9db1985f3097
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin-1 e' encoded: é
 
+changeset:   1:af6e0db4427c
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     euc-jp: ÆüËܸì = u'\u65e5\u672c\u8a9e'
+
 changeset:   0:60aad1dd20a9
 user:        test
 date:        Thu Jan 01 00:00:00 1970 +0000
 summary:     latin-1 e': é
 
 % ascii
-tip                                4:d8a5d9eaf41e
-?                                  2:91878608adb3
+tip                                5:e4ed49b8a8f0
+?                                  3:d47908dab82f
 % latin-1
-tip                                4:d8a5d9eaf41e
-é                                  2:91878608adb3
+tip                                5:e4ed49b8a8f0
+é                                  3:d47908dab82f
+% utf-8
+tip                                5:e4ed49b8a8f0
+é                                  3:d47908dab82f
+% ascii
+?                              5:e4ed49b8a8f0
+% latin-1
+é                              5:e4ed49b8a8f0
+% utf-8
+é                              5:e4ed49b8a8f0
 % utf-8
-tip                                4:d8a5d9eaf41e
-é                                  2:91878608adb3
-% ascii
-?                              4:d8a5d9eaf41e
-% latin-1
-é                              4:d8a5d9eaf41e
-% utf-8
-é                              4:d8a5d9eaf41e
+changeset:   5:e4ed49b8a8f0
+branch:      é
+tag:         tip
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     latin1 branch
+
+changeset:   4:a02ca5a58e99
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     Added tag é for changeset d47908dab82f
+
+changeset:   3:d47908dab82f
+tag:         é
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     utf-8 e' encoded: é
+
+changeset:   2:9db1985f3097
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     latin-1 e' encoded: é
+
+changeset:   1:af6e0db4427c
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     euc-jp: 日本語 = u'\u65e5\u672c\u8a9e'
+
+changeset:   0:60aad1dd20a9
+user:        test
+date:        Thu Jan 01 00:00:00 1970 +0000
+summary:     latin-1 e': �
+