tests/test-encoding-func.py
author Gregory Szorc <gregory.szorc@gmail.com>
Fri, 19 Oct 2018 16:16:17 +0200
changeset 40374 47c03042cd1d
parent 37991 3ea3c96ada54
child 43076 2372284d9457
permissions -rw-r--r--
branchmap: pass changelog into branchmap functions As part of building the branchmap, we loop over revs and call branchmap() or _branchmap(). Previously, these functions were accessing repo.changelog. We know from past experience that repo.changelog in loops is bad for performance. This commit teaches the branchmap code to pass a changelog instance into branchmap() and _branchmap() so we don't need to pay this penalty. On my MBP, this appears to show a speedup on a clone of the mozilla-unified repo: $ hg perfbranchmap --clear-revbranch ! base ! wall 21.078160 comb 21.070000 user 20.920000 sys 0.150000 (best of 3) ! wall 20.574682 comb 20.560000 user 20.400000 sys 0.160000 (best of 3) $ hg perfbranchmap ! base ! wall 4.880413 comb 4.870000 user 4.860000 sys 0.010000 (best of 3) ! wall 4.573968 comb 4.560000 user 4.550000 sys 0.010000 (best of 3) Differential Revision: https://phab.mercurial-scm.org/D5161

from __future__ import absolute_import

import unittest

from mercurial import (
    encoding,
)

class IsasciistrTest(unittest.TestCase):
    asciistrs = [
        b'a',
        b'ab',
        b'abc',
        b'abcd',
        b'abcde',
        b'abcdefghi',
        b'abcd\0fghi',
    ]

    def testascii(self):
        for s in self.asciistrs:
            self.assertTrue(encoding.isasciistr(s))

    def testnonasciichar(self):
        for s in self.asciistrs:
            for i in range(len(s)):
                t = bytearray(s)
                t[i] |= 0x80
                self.assertFalse(encoding.isasciistr(bytes(t)))

class LocalEncodingTest(unittest.TestCase):
    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.tolocal(s))
        self.assertTrue(s is encoding.fromlocal(s))

class Utf8bEncodingTest(unittest.TestCase):
    def setUp(self):
        self.origencoding = encoding.encoding

    def tearDown(self):
        encoding.encoding = self.origencoding

    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.toutf8b(s))
        self.assertTrue(s is encoding.fromutf8b(s))

    def testlossylatin(self):
        encoding.encoding = b'ascii'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'?')  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlosslesslatin(self):
        encoding.encoding = b'latin-1'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc0')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

    def testlossy0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertIn(b'\xed', l)
        self.assertTrue(l.endswith(b'?'))  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlossless0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc5\xed')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

if __name__ == '__main__':
    import silenttestrunner
    silenttestrunner.main(__name__)