changeset 11893:aa50d07208d2

Merge with stable
author Patrick Mezard <pmezard@gmail.com>
date Sun, 15 Aug 2010 18:50:19 +0200
parents 2bd699886ffc (current diff) 2be70ca17311 (diff)
children a15936ac7ec5
files tests/test-hgweb-commands.out
diffstat 4 files changed, 41 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/mercurial/encoding.py	Sun Aug 15 18:25:29 2010 +0200
+++ b/mercurial/encoding.py	Sun Aug 15 18:50:19 2010 +0200
@@ -8,21 +8,41 @@
 import error
 import sys, unicodedata, locale, os
 
-_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'}
+def _getpreferredencoding():
+    '''
+    On darwin, getpreferredencoding ignores the locale environment and
+    always returns mac-roman. http://bugs.python.org/issue6202 fixes this
+    for Python 2.7 and up. This is the same corrected code for earlier
+    Python versions.
+
+    However, we can't use a version check for this method, as some distributions 
+    patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman
+    encoding, as it is unlikely that this encoding is the actually expected.
+    '''
+    try:
+        locale.CODESET
+    except AttributeError:
+        # Fall back to parsing environment variables :-(
+        return locale.getdefaultlocale()[1]
+
+    oldloc = locale.setlocale(locale.LC_CTYPE)
+    locale.setlocale(locale.LC_CTYPE, "")
+    result = locale.nl_langinfo(locale.CODESET)
+    locale.setlocale(locale.LC_CTYPE, oldloc)
+
+    return result
+
+_encodingfixers = {
+    '646': lambda: 'ascii',
+    'ANSI_X3.4-1968': lambda: 'ascii',
+    'mac-roman': _getpreferredencoding
+}
 
 try:
     encoding = os.environ.get("HGENCODING")
-    if sys.platform == 'darwin' and not encoding:
-        # On darwin, getpreferredencoding ignores the locale environment and
-        # always returns mac-roman. We override this if the environment is
-        # not C (has been customized by the user).
-        lc = locale.setlocale(locale.LC_CTYPE, '')
-        if lc == 'UTF-8':
-            locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8')
-        encoding = locale.getlocale()[1]
     if not encoding:
         encoding = locale.getpreferredencoding() or 'ascii'
-        encoding = _encodingfixup.get(encoding, encoding)
+        encoding = _encodingfixers.get(encoding, lambda: encoding)()
 except locale.Error:
     encoding = 'ascii'
 encodingmode = os.environ.get("HGENCODINGMODE", "strict")
--- a/mercurial/templatefilters.py	Sun Aug 15 18:25:29 2010 +0200
+++ b/mercurial/templatefilters.py	Sun Aug 15 18:50:19 2010 +0200
@@ -140,6 +140,12 @@
             .replace("'", '&#39;')) # &apos; invalid in HTML
     return re.sub('[\x00-\x08\x0B\x0C\x0E-\x1F]', ' ', text)
 
+def uescape(c):
+    if ord(c) < 0x80:
+        return c
+    else:
+        return '\\u%04x' % ord(c)
+
 _escapes = [
     ('\\', '\\\\'), ('"', '\\"'), ('\t', '\\t'), ('\n', '\\n'),
     ('\r', '\\r'), ('\f', '\\f'), ('\b', '\\b'),
@@ -148,7 +154,7 @@
 def jsonescape(s):
     for k, v in _escapes:
         s = s.replace(k, v)
-    return s
+    return ''.join(uescape(c) for c in s)
 
 def json(obj):
     if obj is None or obj is False or obj is True:
@@ -157,9 +163,9 @@
         return str(obj)
     elif isinstance(obj, str):
         u = unicode(obj, encoding.encoding, 'replace')
-        return '"%s"' % jsonescape(u).encode('utf-8')
+        return '"%s"' % jsonescape(u)
     elif isinstance(obj, unicode):
-        return '"%s"' % jsonescape(obj).encode('utf-8')
+        return '"%s"' % jsonescape(obj)
     elif hasattr(obj, 'keys'):
         out = []
         for k, v in obj.iteritems():
--- a/tests/test-hgweb-commands.out	Sun Aug 15 18:25:29 2010 +0200
+++ b/tests/test-hgweb-commands.out	Sun Aug 15 18:50:19 2010 +0200
@@ -984,5 +984,5 @@
 }
 % Stop and restart with HGENCODING=cp932
 % Graph json escape of multibyte character
-var data = [["40b4d6888e92", [0, 1], [[0, 0, 1]], "能", "test", "1970-01-01", ["stable", true], ["tip"]], ["1d22e65f027e", [0, 1], [[0, 0, 1]], "branch", "test", "1970-01-01", ["stable", false], []], ["a4f92ed23982", [0, 1], [[0, 0, 1]], "Added tag 1.0 for changeset 2ef0ac749a14", "test", "1970-01-01", ["default", true], []], ["2ef0ac749a14", [0, 1], [], "base", "test", "1970-01-01", ["default", false], ["1.0"]]];
+var data = [["40b4d6888e92", [0, 1], [[0, 0, 1]], "\u80fd", "test", "1970-01-01", ["stable", true], ["tip"]], ["1d22e65f027e", [0, 1], [[0, 0, 1]], "branch", "test", "1970-01-01", ["stable", false], []], ["a4f92ed23982", [0, 1], [[0, 0, 1]], "Added tag 1.0 for changeset 2ef0ac749a14", "test", "1970-01-01", ["default", true], []], ["2ef0ac749a14", [0, 1], [], "base", "test", "1970-01-01", ["default", false], ["1.0"]]];
 % ERRORS ENCOUNTERED
--- a/tests/test-notify	Sun Aug 15 18:25:29 2010 +0200
+++ b/tests/test-notify	Sun Aug 15 18:50:19 2010 +0200
@@ -41,7 +41,7 @@
 # of the very long subject line
 echo '% pull (minimal config)'
 hg --traceback --cwd b pull ../a 2>&1 |
-  python -c 'import sys,re; print re.sub("\n\t", " ", sys.stdin.read()),' |
+  python -c 'import sys,re; print re.sub("([n:])\\n[\\t ]", "\\1 ", sys.stdin.read()),' |
   sed -e 's/\(Message-Id:\).*/\1/' \
   -e 's/changeset \([0-9a-f]* *\)in .*test-notif/changeset \1in test-notif/' \
   -e 's/^details: .*test-notify/details: test-notify/' \