tests/test-encoding-func.py
author Martin von Zweigbergk <martinvonz@google.com>
Sun, 29 Apr 2018 10:07:40 -0700
changeset 38894 7848f284b211
parent 37991 3ea3c96ada54
child 43076 2372284d9457
permissions -rw-r--r--
revisions: allow "x123" to refer to nodeid prefix "123" When resolving "123" to a revision, we try to interpret it as revnum before we try to interpret it as a nodeid hex prefix. This can lead to the shortest valid prefix being longer than necessary. This patch lets us write such nodeids in a shorter form by prefixing them with "x" instead of adding more hex digits until they're longer than the longest decimal revnum. On my hg repo with almost 69k revisions, turning this feature on saves on average 0.4% on the average nodeid length. That clearly doesn't justify this patch. However, it becomes more usefule when combined with the earlier patches in this series that let you disambiguate nodeid prefixes within a configured revset. Note that we attempt to resolve symbols as nodeid prefixes after we've exhausted all other posibilities, so this is a backwards compatible change (only queries that would previously fail may now succeed). I've still hidden this feature behind an experiemntal config option so we can roll it back if needed. Differential Revision: https://phab.mercurial-scm.org/D4041

from __future__ import absolute_import

import unittest

from mercurial import (
    encoding,
)

class IsasciistrTest(unittest.TestCase):
    asciistrs = [
        b'a',
        b'ab',
        b'abc',
        b'abcd',
        b'abcde',
        b'abcdefghi',
        b'abcd\0fghi',
    ]

    def testascii(self):
        for s in self.asciistrs:
            self.assertTrue(encoding.isasciistr(s))

    def testnonasciichar(self):
        for s in self.asciistrs:
            for i in range(len(s)):
                t = bytearray(s)
                t[i] |= 0x80
                self.assertFalse(encoding.isasciistr(bytes(t)))

class LocalEncodingTest(unittest.TestCase):
    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.tolocal(s))
        self.assertTrue(s is encoding.fromlocal(s))

class Utf8bEncodingTest(unittest.TestCase):
    def setUp(self):
        self.origencoding = encoding.encoding

    def tearDown(self):
        encoding.encoding = self.origencoding

    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.toutf8b(s))
        self.assertTrue(s is encoding.fromutf8b(s))

    def testlossylatin(self):
        encoding.encoding = b'ascii'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'?')  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlosslesslatin(self):
        encoding.encoding = b'latin-1'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc0')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

    def testlossy0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertIn(b'\xed', l)
        self.assertTrue(l.endswith(b'?'))  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlossless0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc5\xed')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

if __name__ == '__main__':
    import silenttestrunner
    silenttestrunner.main(__name__)