# HG changeset patch # User Patrick Mezard # Date 1281891019 -7200 # Node ID aa50d07208d2be696a59ea68c8889a6dc8925ebc # Parent 2bd699886ffc6048825e4bfd8cf05271ba7b2b69# Parent 2be70ca1731165935aaf751c60f161655c87a7f5 Merge with stable diff -r 2bd699886ffc -r aa50d07208d2 mercurial/encoding.py --- a/mercurial/encoding.py Sun Aug 15 18:25:29 2010 +0200 +++ b/mercurial/encoding.py Sun Aug 15 18:50:19 2010 +0200 @@ -8,21 +8,41 @@ import error import sys, unicodedata, locale, os -_encodingfixup = {'646': 'ascii', 'ANSI_X3.4-1968': 'ascii'} +def _getpreferredencoding(): + ''' + On darwin, getpreferredencoding ignores the locale environment and + always returns mac-roman. http://bugs.python.org/issue6202 fixes this + for Python 2.7 and up. This is the same corrected code for earlier + Python versions. + + However, we can't use a version check for this method, as some distributions + patch Python to fix this. Instead, we use it as a 'fixer' for the mac-roman + encoding, as it is unlikely that this encoding is the actually expected. + ''' + try: + locale.CODESET + except AttributeError: + # Fall back to parsing environment variables :-( + return locale.getdefaultlocale()[1] + + oldloc = locale.setlocale(locale.LC_CTYPE) + locale.setlocale(locale.LC_CTYPE, "") + result = locale.nl_langinfo(locale.CODESET) + locale.setlocale(locale.LC_CTYPE, oldloc) + + return result + +_encodingfixers = { + '646': lambda: 'ascii', + 'ANSI_X3.4-1968': lambda: 'ascii', + 'mac-roman': _getpreferredencoding +} try: encoding = os.environ.get("HGENCODING") - if sys.platform == 'darwin' and not encoding: - # On darwin, getpreferredencoding ignores the locale environment and - # always returns mac-roman. We override this if the environment is - # not C (has been customized by the user). - lc = locale.setlocale(locale.LC_CTYPE, '') - if lc == 'UTF-8': - locale.setlocale(locale.LC_CTYPE, 'en_US.UTF-8') - encoding = locale.getlocale()[1] if not encoding: encoding = locale.getpreferredencoding() or 'ascii' - encoding = _encodingfixup.get(encoding, encoding) + encoding = _encodingfixers.get(encoding, lambda: encoding)() except locale.Error: encoding = 'ascii' encodingmode = os.environ.get("HGENCODINGMODE", "strict") diff -r 2bd699886ffc -r aa50d07208d2 mercurial/templatefilters.py --- a/mercurial/templatefilters.py Sun Aug 15 18:25:29 2010 +0200 +++ b/mercurial/templatefilters.py Sun Aug 15 18:50:19 2010 +0200 @@ -140,6 +140,12 @@ .replace("'", ''')) # ' invalid in HTML return re.sub('[\x00-\x08\x0B\x0C\x0E-\x1F]', ' ', text) +def uescape(c): + if ord(c) < 0x80: + return c + else: + return '\\u%04x' % ord(c) + _escapes = [ ('\\', '\\\\'), ('"', '\\"'), ('\t', '\\t'), ('\n', '\\n'), ('\r', '\\r'), ('\f', '\\f'), ('\b', '\\b'), @@ -148,7 +154,7 @@ def jsonescape(s): for k, v in _escapes: s = s.replace(k, v) - return s + return ''.join(uescape(c) for c in s) def json(obj): if obj is None or obj is False or obj is True: @@ -157,9 +163,9 @@ return str(obj) elif isinstance(obj, str): u = unicode(obj, encoding.encoding, 'replace') - return '"%s"' % jsonescape(u).encode('utf-8') + return '"%s"' % jsonescape(u) elif isinstance(obj, unicode): - return '"%s"' % jsonescape(obj).encode('utf-8') + return '"%s"' % jsonescape(obj) elif hasattr(obj, 'keys'): out = [] for k, v in obj.iteritems(): diff -r 2bd699886ffc -r aa50d07208d2 tests/test-hgweb-commands.out --- a/tests/test-hgweb-commands.out Sun Aug 15 18:25:29 2010 +0200 +++ b/tests/test-hgweb-commands.out Sun Aug 15 18:50:19 2010 +0200 @@ -984,5 +984,5 @@ } % Stop and restart with HGENCODING=cp932 % Graph json escape of multibyte character -var data = [["40b4d6888e92", [0, 1], [[0, 0, 1]], "能", "test", "1970-01-01", ["stable", true], ["tip"]], ["1d22e65f027e", [0, 1], [[0, 0, 1]], "branch", "test", "1970-01-01", ["stable", false], []], ["a4f92ed23982", [0, 1], [[0, 0, 1]], "Added tag 1.0 for changeset 2ef0ac749a14", "test", "1970-01-01", ["default", true], []], ["2ef0ac749a14", [0, 1], [], "base", "test", "1970-01-01", ["default", false], ["1.0"]]]; +var data = [["40b4d6888e92", [0, 1], [[0, 0, 1]], "\u80fd", "test", "1970-01-01", ["stable", true], ["tip"]], ["1d22e65f027e", [0, 1], [[0, 0, 1]], "branch", "test", "1970-01-01", ["stable", false], []], ["a4f92ed23982", [0, 1], [[0, 0, 1]], "Added tag 1.0 for changeset 2ef0ac749a14", "test", "1970-01-01", ["default", true], []], ["2ef0ac749a14", [0, 1], [], "base", "test", "1970-01-01", ["default", false], ["1.0"]]]; % ERRORS ENCOUNTERED diff -r 2bd699886ffc -r aa50d07208d2 tests/test-notify --- a/tests/test-notify Sun Aug 15 18:25:29 2010 +0200 +++ b/tests/test-notify Sun Aug 15 18:50:19 2010 +0200 @@ -41,7 +41,7 @@ # of the very long subject line echo '% pull (minimal config)' hg --traceback --cwd b pull ../a 2>&1 | - python -c 'import sys,re; print re.sub("\n\t", " ", sys.stdin.read()),' | + python -c 'import sys,re; print re.sub("([n:])\\n[\\t ]", "\\1 ", sys.stdin.read()),' | sed -e 's/\(Message-Id:\).*/\1/' \ -e 's/changeset \([0-9a-f]* *\)in .*test-notif/changeset \1in test-notif/' \ -e 's/^details: .*test-notify/details: test-notify/' \