view tests/test-encoding-func.py @ 44261:04a3ae7aba14

chg: force-set LC_CTYPE on server start to actual value from the environment Python 3.7+ will "coerce" the LC_CTYPE variable in many instances, and this can cause issues with chg being able to start up. D7550 attempted to fix this, but a combination of a misreading of the way that python3.7 does the coercion and an untested state (LC_CTYPE being set to an invalid value) meant that this was still not quite working. This change will cause differences between chg and hg: hg will have the LC_CTYPE environment variable coerced, while chg will not. This is unlikely to cause any detectable behavior differences in what Mercurial itself outputs, but it does have two known effects: - When using hg, the coerced LC_CTYPE will be passed to subprocesses, even non-python ones. Using chg will remove the coercion, and this will not happen. This is arguably more correct behavior on chg's part. - On macOS, if you set your region to Brazil but your language to English, this isn't representable in locale strings, so macOS sets LC_CTYPE=UTF-8. If this value is passed along when ssh'ing to a non-macOS machine, some functions (such as locale.setlocale()) may raise an exception due to an unsupported locale setting. This is most easily encountered when doing an interactive commit/split/etc. when using ui.interface=curses. Differential Revision: https://phab.mercurial-scm.org/D8039
author Kyle Lippincott <spectral@google.com>
date Wed, 29 Jan 2020 13:39:50 -0800
parents 2372284d9457
children 6000f5b25c9b
line wrap: on
line source

from __future__ import absolute_import

import unittest

from mercurial import encoding


class IsasciistrTest(unittest.TestCase):
    asciistrs = [
        b'a',
        b'ab',
        b'abc',
        b'abcd',
        b'abcde',
        b'abcdefghi',
        b'abcd\0fghi',
    ]

    def testascii(self):
        for s in self.asciistrs:
            self.assertTrue(encoding.isasciistr(s))

    def testnonasciichar(self):
        for s in self.asciistrs:
            for i in range(len(s)):
                t = bytearray(s)
                t[i] |= 0x80
                self.assertFalse(encoding.isasciistr(bytes(t)))


class LocalEncodingTest(unittest.TestCase):
    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.tolocal(s))
        self.assertTrue(s is encoding.fromlocal(s))


class Utf8bEncodingTest(unittest.TestCase):
    def setUp(self):
        self.origencoding = encoding.encoding

    def tearDown(self):
        encoding.encoding = self.origencoding

    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.toutf8b(s))
        self.assertTrue(s is encoding.fromutf8b(s))

    def testlossylatin(self):
        encoding.encoding = b'ascii'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'?')  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlosslesslatin(self):
        encoding.encoding = b'latin-1'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc0')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

    def testlossy0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertIn(b'\xed', l)
        self.assertTrue(l.endswith(b'?'))  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlossless0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc5\xed')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8


if __name__ == '__main__':
    import silenttestrunner

    silenttestrunner.main(__name__)