make transcoding more robust
authorMatt Mackall <mpm@selenic.com>
Sat, 09 Dec 2006 12:46:01 -0600
changeset 3843 abaa2cd00d2b
parent 3842 47c634bf1e92
child 3844 3ba82c3f4bc3
make transcoding more robust default to ASCII abort if unknown encoding wrap abort strings with _ add test
mercurial/util.py
tests/test-encoding
tests/test-encoding.out
--- a/mercurial/util.py	Sat Dec 09 18:49:24 2006 +0100
+++ b/mercurial/util.py	Sat Dec 09 12:46:01 2006 -0600
@@ -17,7 +17,8 @@
 demandload(globals(), "cStringIO errno getpass popen2 re shutil sys tempfile")
 demandload(globals(), "os threading time calendar ConfigParser locale")
 
-_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding()
+_encoding = os.environ.get("HGENCODING") or locale.getpreferredencoding() \
+            or "ascii"
 _encodingmode = os.environ.get("HGENCODINGMODE", "strict")
 _fallbackencoding = 'ISO-8859-1'
 
@@ -35,6 +36,8 @@
         try:
             u = s.decode(e) # attempt strict decoding
             return u.encode(_encoding, "replace")
+        except LookupError, k:
+            raise Abort(_("%s, please check your locale settings") % k)
         except UnicodeDecodeError:
             pass
     u = s.decode("utf-8", "replace") # last ditch
@@ -54,7 +57,9 @@
         return s.decode(_encoding, _encodingmode).encode("utf-8")
     except UnicodeDecodeError, inst:
         sub = s[max(0, inst.start-10):inst.start+10]
-        raise Abort("decoding near '%s': %s!\n" % (sub, inst))
+        raise Abort("decoding near '%s': %s!" % (sub, inst))
+    except LookupError, k:
+        raise Abort(_("%s, please check your locale settings") % k)
 
 def locallen(s):
     """Find the length in characters of a local string"""
@@ -70,7 +75,7 @@
         return u.encode(_encoding, _encodingmode)
     except UnicodeDecodeError, inst:
         sub = s[max(0, inst.start-10), inst.start+10]
-        raise Abort("decoding near '%s': %s!\n" % (sub, inst))
+        raise Abort(_("decoding near '%s': %s!\n") % (sub, inst))
 
 # used by parsedate
 defaultdateformats = (
--- a/tests/test-encoding	Sat Dec 09 18:49:24 2006 +0100
+++ b/tests/test-encoding	Sat Dec 09 12:46:01 2006 -0600
@@ -52,3 +52,5 @@
 echo 'fallbackencoding = koi8-r' >> .hg/hgrc
 echo % utf-8
 HGENCODING=utf-8 hg log
+
+HGENCODING=dolphin hg log
\ No newline at end of file
--- a/tests/test-encoding.out	Sat Dec 09 18:49:24 2006 +0100
+++ b/tests/test-encoding.out	Sat Dec 09 12:46:01 2006 -0600
@@ -10,7 +10,6 @@
 ? latin-1-tag
 ? utf-8
 abort: decoding near ' encoded: é': 'ascii' codec can't decode byte 0xe9 in position 20: ordinal not in range(128)!
-
 transaction abort!
 rollback completed
 % these should work
@@ -165,3 +164,4 @@
 date:        Mon Jan 12 13:46:40 1970 +0000
 summary:     latin-1 e': И = u'\xe9'
 
+abort: unknown encoding: dolphin, please check your locale settings