Mercurial > hg
changeset 31439:b70407bd84d5
pycompat: add bytestr wrapper which mostly acts as a Python 2 str
This allows us to handle bytes in mostly the same manner as Python 2 str,
so we can get rid of ugly s[i:i + 1] hacks:
s = bytestr(s)
while i < len(s):
c = s[i]
...
This is the simpler version of the previous RFC patch which tried to preserve
the bytestr type if possible. New version simply drops the bytestr wrapping
so we aren't likely to pass a bytestr to a function that expects Python 3
bytes.
author | Yuya Nishihara <yuya@tcha.org> |
---|---|
date | Wed, 08 Mar 2017 22:48:26 +0900 |
parents | 82350f7fa56c |
children | f784ba187089 |
files | mercurial/pycompat.py tests/test-doctest.py |
diffstat | 2 files changed, 63 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/mercurial/pycompat.py Wed Mar 08 22:13:32 2017 +0900 +++ b/mercurial/pycompat.py Wed Mar 08 22:48:26 2017 +0900 @@ -76,6 +76,67 @@ bytechr = struct.Struct('>B').pack + class bytestr(bytes): + """A bytes which mostly acts as a Python 2 str + + >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1) + (b'', b'foo', b'ascii', b'1') + >>> s = bytestr(b'foo') + >>> assert s is bytestr(s) + + There's no implicit conversion from non-ascii str as its encoding is + unknown: + + >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + UnicodeEncodeError: ... + + Comparison between bytestr and bytes should work: + + >>> assert bytestr(b'foo') == b'foo' + >>> assert b'foo' == bytestr(b'foo') + >>> assert b'f' in bytestr(b'foo') + >>> assert bytestr(b'f') in b'foo' + + Sliced elements should be bytes, not integer: + + >>> s[1], s[:2] + (b'o', b'fo') + >>> list(s), list(reversed(s)) + ([b'f', b'o', b'o'], [b'o', b'o', b'f']) + + As bytestr type isn't propagated across operations, you need to cast + bytes to bytestr explicitly: + + >>> s = bytestr(b'foo').upper() + >>> t = bytestr(s) + >>> s[0], t[0] + (70, b'F') + + Be careful to not pass a bytestr object to a function which expects + bytearray-like behavior. + + >>> t = bytes(t) # cast to bytes + >>> assert type(t) is bytes + """ + + def __new__(cls, s=b''): + if isinstance(s, bytestr): + return s + if not isinstance(s, (bytes, bytearray)): + s = str(s).encode(u'ascii') + return bytes.__new__(cls, s) + + def __getitem__(self, key): + s = bytes.__getitem__(self, key) + if not isinstance(s, bytes): + s = bytechr(s) + return s + + def __iter__(self): + return iterbytestr(bytes.__iter__(self)) + def iterbytestr(s): """Iterate bytes as if it were a str object of Python 2""" return map(bytechr, s) @@ -146,6 +207,7 @@ import cStringIO bytechr = chr + bytestr = str iterbytestr = iter def sysstr(s):
--- a/tests/test-doctest.py Wed Mar 08 22:13:32 2017 +0900 +++ b/tests/test-doctest.py Wed Mar 08 22:48:26 2017 +0900 @@ -34,6 +34,7 @@ testmod('mercurial.patch') testmod('mercurial.pathutil') testmod('mercurial.parser') +testmod('mercurial.pycompat', py3=True) testmod('mercurial.revsetlang') testmod('mercurial.smartset') testmod('mercurial.store')