Mercurial > hg
view tests/test-encoding-func.py @ 40053:8c692a6b5ad1
fuzz: new fuzzer for cext/manifest.c
This is a bit messy, because lazymanifest is tightly coupled to the
cpython API for performance reasons. As a result, we have to build a
whole Python without pymalloc (so ASAN can help us out) and link
against that. Then we have to use an embedded Python interpreter. We
could manually drive the lazymanifest in C from that point, but
experimentally just using PyEval_EvalCode isn't really any slower so
we may as well do that and write the innermost guts of the fuzzer in
Python.
Leak detection is currently disabled for this fuzzer because there are
a few global-lifetime things in our extensions that we more or less
intentionally leak and I didn't want to take the detour to work around
that for now.
This should not be pushed to our repo until
https://github.com/google/oss-fuzz/pull/1853 is merged, as this
depends on having the Python tarball around.
Differential Revision: https://phab.mercurial-scm.org/D4879
author | Augie Fackler <augie@google.com> |
---|---|
date | Thu, 06 Sep 2018 02:36:25 -0400 |
parents | 3ea3c96ada54 |
children | 2372284d9457 |
line wrap: on
line source
from __future__ import absolute_import import unittest from mercurial import ( encoding, ) class IsasciistrTest(unittest.TestCase): asciistrs = [ b'a', b'ab', b'abc', b'abcd', b'abcde', b'abcdefghi', b'abcd\0fghi', ] def testascii(self): for s in self.asciistrs: self.assertTrue(encoding.isasciistr(s)) def testnonasciichar(self): for s in self.asciistrs: for i in range(len(s)): t = bytearray(s) t[i] |= 0x80 self.assertFalse(encoding.isasciistr(bytes(t))) class LocalEncodingTest(unittest.TestCase): def testasciifastpath(self): s = b'\0' * 100 self.assertTrue(s is encoding.tolocal(s)) self.assertTrue(s is encoding.fromlocal(s)) class Utf8bEncodingTest(unittest.TestCase): def setUp(self): self.origencoding = encoding.encoding def tearDown(self): encoding.encoding = self.origencoding def testasciifastpath(self): s = b'\0' * 100 self.assertTrue(s is encoding.toutf8b(s)) self.assertTrue(s is encoding.fromutf8b(s)) def testlossylatin(self): encoding.encoding = b'ascii' s = u'\xc0'.encode('utf-8') l = encoding.tolocal(s) self.assertEqual(l, b'?') # lossy self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved def testlosslesslatin(self): encoding.encoding = b'latin-1' s = u'\xc0'.encode('utf-8') l = encoding.tolocal(s) self.assertEqual(l, b'\xc0') # lossless self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 def testlossy0xed(self): encoding.encoding = b'euc-kr' # U+Dxxx Hangul s = u'\ud1bc\xc0'.encode('utf-8') l = encoding.tolocal(s) self.assertIn(b'\xed', l) self.assertTrue(l.endswith(b'?')) # lossy self.assertEqual(s, encoding.toutf8b(l)) # utf8 sequence preserved def testlossless0xed(self): encoding.encoding = b'euc-kr' # U+Dxxx Hangul s = u'\ud1bc'.encode('utf-8') l = encoding.tolocal(s) self.assertEqual(l, b'\xc5\xed') # lossless self.assertEqual(s, encoding.toutf8b(l)) # convert back to utf-8 if __name__ == '__main__': import silenttestrunner silenttestrunner.main(__name__)