view tests/test-encoding-func.py @ 38483:3efadf2317c7

windows: add a method to convert Unix style command lines to Windows style This started as a copy/paste of `os.path.expandvars()`, but limited to a given dictionary of variables, converting `foo = foo + bar` to `foo += bar`, and adding 'b' string prefixes. Then code was added to make sure that a value being substituted in wouldn't itself be expanded by cmd.exe. But that left inconsistent results between `$var1` and `%var1%` when its value was '%foo%'- since neither were touched, `$var1` wouldn't expand but `%var1%` would. So instead, this just converts the Unix style to Windows style (if the variable exists, because Windows will leave `%missing%` as-is), and lets cmd.exe do its thing. I then dropped the %% -> % conversion (because Windows doesn't do this), and added the ability to escape the '$' with '\'. The escape character is dropped, for consistency with shell handling. After everything seemed stable and working, running the whole test suite flagged a problem near the end of test-bookmarks.t:1069. The problem is cmd.exe won't pass empty variables to its child, so defined but empty variables are now skipped. I can't think of anything better, and it seems like a pre-existing violation of the documentation, which calls out that HG_OLDNODE is empty on bookmark creation. Future additions could potentially be replacing strong quotes with double quotes (cmd.exe doesn't know what to do with the former), escaping a double quote, and some tilde expansion via os.path.expanduser(). I've got some doubts about replacing the strong quotes in case sh.exe is run, but it seems like the right thing to do the vast majority of the time. The original form of this was discussed about a year ago[1]. [1] https://www.mercurial-scm.org/pipermail/mercurial-devel/2017-July/100735.html
author Matt Harbison <matt_harbison@yahoo.com>
date Sun, 24 Jun 2018 01:13:09 -0400
parents 3ea3c96ada54
children 2372284d9457
line wrap: on
line source

from __future__ import absolute_import

import unittest

from mercurial import (
    encoding,
)

class IsasciistrTest(unittest.TestCase):
    asciistrs = [
        b'a',
        b'ab',
        b'abc',
        b'abcd',
        b'abcde',
        b'abcdefghi',
        b'abcd\0fghi',
    ]

    def testascii(self):
        for s in self.asciistrs:
            self.assertTrue(encoding.isasciistr(s))

    def testnonasciichar(self):
        for s in self.asciistrs:
            for i in range(len(s)):
                t = bytearray(s)
                t[i] |= 0x80
                self.assertFalse(encoding.isasciistr(bytes(t)))

class LocalEncodingTest(unittest.TestCase):
    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.tolocal(s))
        self.assertTrue(s is encoding.fromlocal(s))

class Utf8bEncodingTest(unittest.TestCase):
    def setUp(self):
        self.origencoding = encoding.encoding

    def tearDown(self):
        encoding.encoding = self.origencoding

    def testasciifastpath(self):
        s = b'\0' * 100
        self.assertTrue(s is encoding.toutf8b(s))
        self.assertTrue(s is encoding.fromutf8b(s))

    def testlossylatin(self):
        encoding.encoding = b'ascii'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'?')  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlosslesslatin(self):
        encoding.encoding = b'latin-1'
        s = u'\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc0')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

    def testlossy0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc\xc0'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertIn(b'\xed', l)
        self.assertTrue(l.endswith(b'?'))  # lossy
        self.assertEqual(s, encoding.toutf8b(l))  # utf8 sequence preserved

    def testlossless0xed(self):
        encoding.encoding = b'euc-kr'  # U+Dxxx Hangul
        s = u'\ud1bc'.encode('utf-8')
        l = encoding.tolocal(s)
        self.assertEqual(l, b'\xc5\xed')  # lossless
        self.assertEqual(s, encoding.toutf8b(l))  # convert back to utf-8

if __name__ == '__main__':
    import silenttestrunner
    silenttestrunner.main(__name__)