comparison mercurial/__init__.py @ 29800:178c89e8519a

py3: import builtin wrappers automagically by code transformer This should be less invasive than mucking builtins. Since tokenize.untokenize() looks start/end positions of tokens, we calculates them from the NEWLINE token of the future import.
author Yuya Nishihara <yuya@tcha.org>
date Tue, 16 Aug 2016 12:35:15 +0900
parents 1c22400db72d
children 3139ec39b505
comparison
equal deleted inserted replaced
29799:45fa8de47a0f 29800:178c89e8519a
168 # TODO need to support loaders from alternate specs, like zip 168 # TODO need to support loaders from alternate specs, like zip
169 # loaders. 169 # loaders.
170 spec.loader = hgloader(spec.name, spec.origin) 170 spec.loader = hgloader(spec.name, spec.origin)
171 return spec 171 return spec
172 172
173 def replacetokens(tokens): 173 def replacetokens(tokens, fullname):
174 """Transform a stream of tokens from raw to Python 3. 174 """Transform a stream of tokens from raw to Python 3.
175 175
176 It is called by the custom module loading machinery to rewrite 176 It is called by the custom module loading machinery to rewrite
177 source/tokens between source decoding and compilation. 177 source/tokens between source decoding and compilation.
178 178
182 its changes do not necessarily match the output token stream. 182 its changes do not necessarily match the output token stream.
183 183
184 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION 184 REMEMBER TO CHANGE ``BYTECODEHEADER`` WHEN CHANGING THIS FUNCTION
185 OR CACHED FILES WON'T GET INVALIDATED PROPERLY. 185 OR CACHED FILES WON'T GET INVALIDATED PROPERLY.
186 """ 186 """
187 futureimpline = False
187 for i, t in enumerate(tokens): 188 for i, t in enumerate(tokens):
188 # Convert most string literals to byte literals. String literals 189 # Convert most string literals to byte literals. String literals
189 # in Python 2 are bytes. String literals in Python 3 are unicode. 190 # in Python 2 are bytes. String literals in Python 3 are unicode.
190 # Most strings in Mercurial are bytes and unicode strings are rare. 191 # Most strings in Mercurial are bytes and unicode strings are rare.
191 # Rather than rewrite all string literals to use ``b''`` to indicate 192 # Rather than rewrite all string literals to use ``b''`` to indicate
213 continue 214 continue
214 215
215 # String literal. Prefix to make a b'' string. 216 # String literal. Prefix to make a b'' string.
216 yield tokenize.TokenInfo(t.type, 'b%s' % s, t.start, t.end, 217 yield tokenize.TokenInfo(t.type, 'b%s' % s, t.start, t.end,
217 t.line) 218 t.line)
219 continue
220
221 # Insert compatibility imports at "from __future__ import" line.
222 # No '\n' should be added to preserve line numbers.
223 if (t.type == token.NAME and t.string == 'import' and
224 all(u.type == token.NAME for u in tokens[i - 2:i]) and
225 [u.string for u in tokens[i - 2:i]] == ['from', '__future__']):
226 futureimpline = True
227 if t.type == token.NEWLINE and futureimpline:
228 futureimpline = False
229 if fullname == 'mercurial.pycompat':
230 yield t
231 continue
232 r, c = t.start
233 l = (b'; from mercurial.pycompat import '
234 b'delattr, getattr, hasattr, setattr, xrange\n')
235 for u in tokenize.tokenize(io.BytesIO(l).readline):
236 if u.type in (tokenize.ENCODING, token.ENDMARKER):
237 continue
238 yield tokenize.TokenInfo(u.type, u.string,
239 (r, c + u.start[1]),
240 (r, c + u.end[1]),
241 '')
218 continue 242 continue
219 243
220 try: 244 try:
221 nexttoken = tokens[i + 1] 245 nexttoken = tokens[i + 1]
222 except IndexError: 246 except IndexError:
277 301
278 # Header to add to bytecode files. This MUST be changed when 302 # Header to add to bytecode files. This MUST be changed when
279 # ``replacetoken`` or any mechanism that changes semantics of module 303 # ``replacetoken`` or any mechanism that changes semantics of module
280 # loading is changed. Otherwise cached bytecode may get loaded without 304 # loading is changed. Otherwise cached bytecode may get loaded without
281 # the new transformation mechanisms applied. 305 # the new transformation mechanisms applied.
282 BYTECODEHEADER = b'HG\x00\x01' 306 BYTECODEHEADER = b'HG\x00\x02'
283 307
284 class hgloader(importlib.machinery.SourceFileLoader): 308 class hgloader(importlib.machinery.SourceFileLoader):
285 """Custom module loader that transforms source code. 309 """Custom module loader that transforms source code.
286 310
287 When the source code is converted to a code object, we transform 311 When the source code is converted to a code object, we transform
336 360
337 def source_to_code(self, data, path): 361 def source_to_code(self, data, path):
338 """Perform token transformation before compilation.""" 362 """Perform token transformation before compilation."""
339 buf = io.BytesIO(data) 363 buf = io.BytesIO(data)
340 tokens = tokenize.tokenize(buf.readline) 364 tokens = tokenize.tokenize(buf.readline)
341 data = tokenize.untokenize(replacetokens(list(tokens))) 365 data = tokenize.untokenize(replacetokens(list(tokens), self.name))
342 # Python's built-in importer strips frames from exceptions raised 366 # Python's built-in importer strips frames from exceptions raised
343 # for this code. Unfortunately, that mechanism isn't extensible 367 # for this code. Unfortunately, that mechanism isn't extensible
344 # and our frame will be blamed for the import failure. There 368 # and our frame will be blamed for the import failure. There
345 # are extremely hacky ways to do frame stripping. We haven't 369 # are extremely hacky ways to do frame stripping. We haven't
346 # implemented them because they are very ugly. 370 # implemented them because they are very ugly.