tests/test-encoding-func.py
author Martin von Zweigbergk <martinvonz@google.com>
Thu, 18 Apr 2019 00:40:53 -0700
changeset 42500 4c39c99d9492
parent 37991 3ea3c96ada54
child 43076 2372284d9457
permissions -rw-r--r--
copies: do full filtering at end of _changesetforwardcopies() As mentioned earlier, pathcopies() is very slow when copies are stored in the changeset. Most of the cost comes from calling _chain() for every changeset, which is slow because it needs to read manifests. It needs to read manifests to be able to filter out copies that are were created in one commit and then deleted. (It also filters out copies that were created from a file that didn't exist in the starting revision, but that's a fixed revision across calls to _chain(), so it's much cheaper.) This patch changes from _chainandfilter() to just _chain() in the main loop in _changesetforwardcopies(). It instead removes copies that have subsequently been removed by using ctx.filesremoved(). We thus rely on that to be fast. It timed this command in mozilla-unified: hg debugpathcopies FIREFOX_59_0b3_BUILD2 FIREFOX_BETA_59_END It took 18s before and 1.1s after. It's still faster when copy information is stored in filelogs: 0.70s. It also still gets slow when there are merge commits involved, because we read manifests there too. We'll deal with that later. Differential Revision: https://phab.mercurial-scm.org/D6419

from __future__ import absolute_import

import unittest

from mercurial import (
    encoding,
)

class IsasciistrTest(unittest.TestCase):
    asciistrs = [
        b'a',
        b'ab',
        b'abc',
        b'abcd',
        b'abcde',
        b'abcdefghi',
        b'abcd\0fghi',
    ]

    def testascii(self):
        for s in self.asciistrs:
            self.assertTrue(encoding.isasciistr(s))

    def testnonasciichar(self):
        for s in self.asciistrs:
            for i in range(len(s)):
                t = bytearray(s)
                t[i] |= 0x80
                self.assertFalse(encoding.isasciistr(bytes(t)))

class LocalEncodingTest(unittest.TestCase):
    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.tolocal(s))
        self.assertTrue(s is encoding.fromlocal(s))

class Utf8bEncodingTest(unittest.TestCase):
    def setUp(self):
        self.origencoding = encoding.encoding

    def tearDown(self):
        encoding.encoding = self.origencoding

    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.toutf8b(s))
        self.assertTrue(s is encoding.fromutf8b(s))

    def testlossylatin(self):
        encoding.encoding = b'ascii'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'?')  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlosslesslatin(self):
        encoding.encoding = b'latin-1'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc0')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

    def testlossy0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertIn(b'\xed', l)
        self.assertTrue(l.endswith(b'?'))  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlossless0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc5\xed')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

if __name__ == '__main__':
    import silenttestrunner
    silenttestrunner.main(__name__)