make transcoding more robust
default to ASCII
abort if unknown encoding
wrap abort strings with _
add test
--- a/mercurial/util.py Sat Dec 09 18:49:24 2006 +0100
+++ b/mercurial/util.py Sat Dec 09 12:46:01 2006 -0600
@@ -17,7 +17,8 @@
demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile")
demandload(globals(), "os threading time calendar ConfigParser locale")
-_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
+_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding() \
+ or "ascii"
_encodingmode = os.environ.get("HGENCODINGMODE", "strict")
_fallbackencoding = 'ISO-8859-1'
@@ -35,6 +36,8 @@
try:
u = s.decode(e) # attempt strict decoding
return u.encode(_encoding, "replace")
+ except LookupError, k:
+ raise Abort(_("%s, please check your locale settings") % k)
except UnicodeDecodeError:
pass
u = s.decode("utf-8", "replace") # last ditch
@@ -54,7 +57,9 @@
return s.decode(_encoding, _encodingmode).encode("utf-8")
except UnicodeDecodeError, inst:
sub = s[max(0, inst.start-10):inst.start+10]
- raise Abort("decoding near '%s': %s!\n" % (sub, inst))
+ raise Abort("decoding near '%s': %s!" % (sub, inst))
+ except LookupError, k:
+ raise Abort(_("%s, please check your locale settings") % k)
def locallen(s):
"""Find the length in characters of a local string"""
@@ -70,7 +75,7 @@
return u.encode(_encoding, _encodingmode)
except UnicodeDecodeError, inst:
sub = s[max(0, inst.start-10), inst.start+10]
- raise Abort("decoding near '%s': %s!\n" % (sub, inst))
+ raise Abort(_("decoding near '%s': %s!\n") % (sub, inst))
# used by parsedate
defaultdateformats = (
--- a/tests/test-encoding Sat Dec 09 18:49:24 2006 +0100
+++ b/tests/test-encoding Sat Dec 09 12:46:01 2006 -0600
@@ -52,3 +52,5 @@
echo 'fallbackencoding = koi8-r' >> .hg/hgrc
echo % utf-8
HGENCODING=utf-8 hg log
+
+HGENCODING=dolphin hg log
\ No newline at end of file
--- a/tests/test-encoding.out Sat Dec 09 18:49:24 2006 +0100
+++ b/tests/test-encoding.out Sat Dec 09 12:46:01 2006 -0600
@@ -10,7 +10,6 @@
? latin-1-tag
? utf-8
abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)!
-
transaction abort!
rollback completed
% these should work
@@ -165,3 +164,4 @@
date: Mon Jan 12 13:46:40 1970 +0000
summary: latin-1 e': И = u'\xe9'
+abort: unknown encoding: dolphin, please check your locale settings