comparison mercurial/pycompat.py @ 31439:b70407bd84d5

pycompat: add bytestr wrapper which mostly acts as a Python 2 str This allows us to handle bytes in mostly the same manner as Python 2 str, so we can get rid of ugly s[i:i + 1] hacks: s = bytestr(s) while i < len(s): c = s[i] ... This is the simpler version of the previous RFC patch which tried to preserve the bytestr type if possible. New version simply drops the bytestr wrapping so we aren't likely to pass a bytestr to a function that expects Python 3 bytes.
author Yuya Nishihara <yuya@tcha.org>
date Wed, 08 Mar 2017 22:48:26 +0900
parents 63a39d647888
children a1e40ceee640
comparison
equal deleted inserted replaced
31438:82350f7fa56c 31439:b70407bd84d5
74 if getattr(sys, 'argv', None) is not None: 74 if getattr(sys, 'argv', None) is not None:
75 sysargv = list(map(os.fsencode, sys.argv)) 75 sysargv = list(map(os.fsencode, sys.argv))
76 76
77 bytechr = struct.Struct('>B').pack 77 bytechr = struct.Struct('>B').pack
78 78
79 class bytestr(bytes):
80 """A bytes which mostly acts as a Python 2 str
81
82 >>> bytestr(), bytestr(bytearray(b'foo')), bytestr(u'ascii'), bytestr(1)
83 (b'', b'foo', b'ascii', b'1')
84 >>> s = bytestr(b'foo')
85 >>> assert s is bytestr(s)
86
87 There's no implicit conversion from non-ascii str as its encoding is
88 unknown:
89
90 >>> bytestr(chr(0x80)) # doctest: +ELLIPSIS
91 Traceback (most recent call last):
92 ...
93 UnicodeEncodeError: ...
94
95 Comparison between bytestr and bytes should work:
96
97 >>> assert bytestr(b'foo') == b'foo'
98 >>> assert b'foo' == bytestr(b'foo')
99 >>> assert b'f' in bytestr(b'foo')
100 >>> assert bytestr(b'f') in b'foo'
101
102 Sliced elements should be bytes, not integer:
103
104 >>> s[1], s[:2]
105 (b'o', b'fo')
106 >>> list(s), list(reversed(s))
107 ([b'f', b'o', b'o'], [b'o', b'o', b'f'])
108
109 As bytestr type isn't propagated across operations, you need to cast
110 bytes to bytestr explicitly:
111
112 >>> s = bytestr(b'foo').upper()
113 >>> t = bytestr(s)
114 >>> s[0], t[0]
115 (70, b'F')
116
117 Be careful to not pass a bytestr object to a function which expects
118 bytearray-like behavior.
119
120 >>> t = bytes(t) # cast to bytes
121 >>> assert type(t) is bytes
122 """
123
124 def __new__(cls, s=b''):
125 if isinstance(s, bytestr):
126 return s
127 if not isinstance(s, (bytes, bytearray)):
128 s = str(s).encode(u'ascii')
129 return bytes.__new__(cls, s)
130
131 def __getitem__(self, key):
132 s = bytes.__getitem__(self, key)
133 if not isinstance(s, bytes):
134 s = bytechr(s)
135 return s
136
137 def __iter__(self):
138 return iterbytestr(bytes.__iter__(self))
139
79 def iterbytestr(s): 140 def iterbytestr(s):
80 """Iterate bytes as if it were a str object of Python 2""" 141 """Iterate bytes as if it were a str object of Python 2"""
81 return map(bytechr, s) 142 return map(bytechr, s)
82 143
83 def sysstr(s): 144 def sysstr(s):
144 205
145 else: 206 else:
146 import cStringIO 207 import cStringIO
147 208
148 bytechr = chr 209 bytechr = chr
210 bytestr = str
149 iterbytestr = iter 211 iterbytestr = iter
150 212
151 def sysstr(s): 213 def sysstr(s):
152 return s 214 return s
153 215